c5ea044f79
Ability to set log level, lots of debugging messages Turn on debug logging during tests, simplified test cases for HTML ripper Fix fusktator ripper, added test Fixed gifyo, added test Added tests for *all* rippers Adding a few album-guessing URLs
214 lines
7.5 KiB
Java
214 lines
7.5 KiB
Java
package com.rarchives.ripme.ripper.rippers;
|
|
|
|
import java.io.IOException;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import org.json.JSONArray;
|
|
import org.json.JSONObject;
|
|
import org.jsoup.Connection.Response;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
|
|
import com.rarchives.ripme.ripper.AlbumRipper;
|
|
import com.rarchives.ripme.utils.Http;
|
|
|
|
public class PhotobucketRipper extends AlbumRipper {
|
|
|
|
private static final String DOMAIN = "photobucket.com",
|
|
HOST = "photobucket";
|
|
|
|
private Response pageResponse = null;
|
|
|
|
public PhotobucketRipper(URL url) throws IOException {
|
|
super(url);
|
|
}
|
|
|
|
@Override
|
|
public String getHost() {
|
|
return HOST;
|
|
}
|
|
|
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
logger.info(url);
|
|
String u = url.toExternalForm();
|
|
if (u.contains("?")) {
|
|
u = u.substring(0, u.indexOf("?"));
|
|
return new URL(u);
|
|
}
|
|
else {
|
|
return url;
|
|
}
|
|
}
|
|
|
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
|
try {
|
|
// Attempt to use album title as GID
|
|
if (pageResponse == null) {
|
|
pageResponse = Http.url(url).response();
|
|
}
|
|
Document albumDoc = pageResponse.parse();
|
|
Elements els = albumDoc.select("div.libraryTitle > h1");
|
|
if (els.size() == 0) {
|
|
throw new IOException("Could not find libraryTitle at " + url);
|
|
}
|
|
return els.get(0).text();
|
|
} catch (IOException e) {
|
|
// Fall back to default album naming convention
|
|
}
|
|
return super.getAlbumTitle(url);
|
|
}
|
|
|
|
@Override
|
|
public String getGID(URL url) throws MalformedURLException {
|
|
Pattern p; Matcher m;
|
|
|
|
// http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1
|
|
p = Pattern.compile("^https?://[a-zA-Z0-9]+\\.photobucket\\.com/user/([a-zA-Z0-9_\\-]+)/library.*$");
|
|
m = p.matcher(url.toExternalForm());
|
|
if (m.matches()) {
|
|
return m.group(1);
|
|
}
|
|
|
|
throw new MalformedURLException(
|
|
"Expected photobucket.com gallery formats: "
|
|
+ "http://x###.photobucket.com/username/library/..."
|
|
+ " Got: " + url);
|
|
}
|
|
|
|
@Override
|
|
public void rip() throws IOException {
|
|
List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
|
|
|
|
List<String> subsToRip = new ArrayList<String>(),
|
|
rippedSubs = new ArrayList<String>();
|
|
|
|
for (String sub : subalbums) {
|
|
subsToRip.add(sub);
|
|
}
|
|
|
|
while (subsToRip.size() > 0 && !isStopped()) {
|
|
try {
|
|
Thread.sleep(1000);
|
|
} catch (InterruptedException e) {
|
|
break;
|
|
}
|
|
String nextSub = subsToRip.remove(0);
|
|
rippedSubs.add(nextSub);
|
|
logger.info("Attempting to rip next subalbum: " + nextSub);
|
|
try {
|
|
pageResponse = null;
|
|
subalbums = ripAlbumAndGetSubalbums(nextSub);
|
|
} catch (IOException e) {
|
|
logger.error("Error while ripping " + nextSub, e);
|
|
break;
|
|
}
|
|
for (String subalbum : subalbums) {
|
|
if (!subsToRip.contains(subalbum) && !rippedSubs.contains(subalbum)) {
|
|
subsToRip.add(subalbum);
|
|
}
|
|
}
|
|
}
|
|
waitForThreads();
|
|
}
|
|
|
|
public List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
|
|
int filesIndex = 0,
|
|
filesTotal = 0,
|
|
pageIndex = 0;
|
|
String currentAlbumPath = null,
|
|
url = null;
|
|
|
|
while (pageIndex == 0 || filesIndex < filesTotal) {
|
|
if (isStopped()) {
|
|
break;
|
|
}
|
|
pageIndex++;
|
|
if (pageIndex > 1 || pageResponse == null) {
|
|
url = theUrl + String.format("?sort=3&page=%d", pageIndex);
|
|
logger.info(" Retrieving " + url);
|
|
pageResponse = Http.url(url).response();
|
|
}
|
|
Document albumDoc = pageResponse.parse();
|
|
// Retrieve JSON from request
|
|
String jsonString = null;
|
|
for (Element script : albumDoc.select("script[type=text/javascript]")) {
|
|
String data = script.data();
|
|
// Ensure this chunk of javascript contains the album info
|
|
if (!data.contains("libraryAlbumsPageCollectionData")) {
|
|
continue;
|
|
}
|
|
// Grab the JSON
|
|
Pattern p; Matcher m;
|
|
p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL);
|
|
m = p.matcher(data);
|
|
if (m.matches()) {
|
|
jsonString = m.group(1);
|
|
break;
|
|
}
|
|
}
|
|
if (jsonString == null) {
|
|
logger.error("Unable to find JSON data at URL: " + url);
|
|
break;
|
|
}
|
|
JSONObject json = new JSONObject(jsonString);
|
|
JSONObject items = json.getJSONObject("items");
|
|
JSONArray objects = items.getJSONArray("objects");
|
|
filesTotal = items.getInt("total");
|
|
currentAlbumPath = json.getString("currentAlbumPath");
|
|
for (int i = 0; i < objects.length(); i++) {
|
|
JSONObject object = objects.getJSONObject(i);
|
|
String image = object.getString("fullsizeUrl");
|
|
filesIndex += 1;
|
|
addURLToDownload(new URL(image),
|
|
"",
|
|
object.getString("location").replaceAll(" ", "_"),
|
|
albumDoc.location(),
|
|
pageResponse.cookies());
|
|
}
|
|
}
|
|
// Get subalbums
|
|
if (url != null) {
|
|
return getSubAlbums(url, currentAlbumPath);
|
|
} else {
|
|
return new ArrayList<String>();
|
|
}
|
|
}
|
|
|
|
private List<String> getSubAlbums(String url, String currentAlbumPath) {
|
|
List<String> result = new ArrayList<String>();
|
|
String subdomain = url.substring(url.indexOf("://")+3);
|
|
subdomain = subdomain.substring(0, subdomain.indexOf("."));
|
|
String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"
|
|
+ "?deferCollapsed=true"
|
|
+ "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy"
|
|
+ "&json=1";
|
|
try {
|
|
logger.info("Loading " + apiUrl);
|
|
JSONObject json = Http.url(apiUrl).getJSON();
|
|
JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums");
|
|
for (int i = 0; i < subalbums.length(); i++) {
|
|
String suburl =
|
|
"http://"
|
|
+ subdomain
|
|
+ ".photobucket.com"
|
|
+ subalbums.getJSONObject(i).getString("path");
|
|
suburl = suburl.replace(" ", "%20");
|
|
result.add(suburl);
|
|
}
|
|
} catch (IOException e) {
|
|
logger.error("Failed to get subalbums from " + apiUrl, e);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public boolean canRip(URL url) {
|
|
return url.getHost().endsWith(DOMAIN);
|
|
}
|
|
|
|
} |