Imgur ripper overhaul, image titles saved to filename #17

Still no album title = directory yet
This commit is contained in:
4pr0n 2014-04-12 16:53:49 -07:00
parent 1efac50834
commit 3a2dcdb4ec
3 changed files with 98 additions and 24 deletions

View File

@ -1,5 +1,6 @@
package com.rarchives.ripme.ripper.rippers;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
@ -19,6 +20,7 @@ import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Utils;
public class ImgurRipper extends AbstractRipper {
@ -42,11 +44,6 @@ public class ImgurRipper extends AbstractRipper {
SLEEP_BETWEEN_ALBUMS = 1;
}
public void processURL(URL url, String prefix, String subdirectory) {
logger.debug("Found URL: " + url);
addURLToDownload(url, prefix, subdirectory);
}
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
@ -84,7 +81,6 @@ public class ImgurRipper extends AbstractRipper {
break;
case USER:
// TODO Get all albums by user
ripUserAccount(url);
break;
case SUBREDDIT:
@ -102,15 +98,25 @@ public class ImgurRipper extends AbstractRipper {
int index = 0;
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
index = 0;
for (URL singleURL : getURLsFromAlbum(url)) {
ImgurAlbum album = getImgurAlbum(url);
for (ImgurImage imgurImage : album.images) {
String saveAs = workingDir.getCanonicalPath();
if (!saveAs.endsWith(File.separator)) {
saveAs += File.separator;
}
if (subdirectory != null && !subdirectory.equals("")) {
saveAs += subdirectory;
}
if (!saveAs.endsWith(File.separator)) {
saveAs += File.separator;
}
index += 1;
processURL(singleURL, String.format("%03d_", index), subdirectory);
saveAs += String.format("%03d_%s", index, imgurImage.getSaveAs());
addURLToDownload(imgurImage.url, new File(saveAs));
}
}
public static List<URL> getURLsFromAlbum(URL url) throws IOException {
List<URL> result = new ArrayList<URL>();
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
logger.info(" Retrieving " + url.toExternalForm());
Document doc = Jsoup.connect(url.toExternalForm())
.userAgent(USER_AGENT)
@ -122,6 +128,8 @@ public class ImgurRipper extends AbstractRipper {
if (m.matches()) {
try {
JSONObject json = new JSONObject(m.group(1));
JSONObject jsonAlbum = json.getJSONObject("album");
ImgurAlbum imgurAlbum = new ImgurAlbum(url, jsonAlbum.getString("title_clean"));
JSONArray images = json.getJSONObject("images").getJSONArray("items");
int imagesLength = images.length();
for (int i = 0; i < imagesLength; i++) {
@ -131,9 +139,12 @@ public class ImgurRipper extends AbstractRipper {
"http://i.imgur.com/"
+ image.get("hash")
+ image.get("ext"));
result.add(imageURL);
ImgurImage imgurImage = new ImgurImage(imageURL,
image.getString("title"),
image.getString("description"));
imgurAlbum.addImage(imgurImage);
}
return result;
return imgurAlbum;
} catch (JSONException e) {
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
}
@ -142,19 +153,22 @@ public class ImgurRipper extends AbstractRipper {
m = p.matcher(doc.body().html());
if (m.matches()) {
try {
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
JSONObject json = new JSONObject(m.group(1));
JSONArray images = json.getJSONArray("hashes");
int imagesLength = images.length();
for (int i = 0; i < imagesLength; i++) {
JSONObject image = images.getJSONObject(i);
URL imageURL = new URL(
"http:" + json.get("cdnUrl")
"http:" + json.getString("cdnUrl")
+ "/"
+ image.get("hash")
+ image.get("ext"));
result.add(imageURL);
+ image.getString("hash")
+ image.getString("ext"));
ImgurImage imgurImage = new ImgurImage(imageURL);
imgurImage.extension = image.getString("ext");
imgurAlbum.addImage(imgurImage);
}
return result;
return imgurAlbum;
} catch (JSONException e) {
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
}
@ -174,6 +188,7 @@ public class ImgurRipper extends AbstractRipper {
// Fall back to parsing HTML elements
// NOTE: This does not always get the highest-resolution images!
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
for (Element thumb : doc.select("div.image")) {
String image;
if (thumb.select("a.zoom").size() > 0) {
@ -186,9 +201,10 @@ public class ImgurRipper extends AbstractRipper {
logger.error("[!] Unable to find image in div: " + thumb.toString());
continue;
}
result.add(new URL(image));
ImgurImage imgurImage = new ImgurImage(new URL(image));
imgurAlbum.addImage(imgurImage);
}
return result;
return imgurAlbum;
}
/**
@ -318,4 +334,54 @@ public class ImgurRipper extends AbstractRipper {
public ALBUM_TYPE getAlbumType() {
return albumType;
}
}
public static class ImgurImage {
public String title = "",
description = "",
extension = "";
public URL url = null;
public ImgurImage(URL url) {
this.url = url;
String tempUrl = url.toExternalForm();
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
}
public ImgurImage(URL url, String title) {
this(url);
this.title = title;
}
public ImgurImage(URL url, String title, String description) {
this(url, title);
this.description = description;
}
public String getSaveAs() {
String saveAs = this.title;
String u = url.toExternalForm();
String imgId = u.substring(u.lastIndexOf('/') + 1, u.lastIndexOf('.'));
if (saveAs == null || saveAs.equals("")) {
saveAs = imgId;
} else {
saveAs = saveAs + "_" + imgId;
}
saveAs = Utils.filesystemSafe(saveAs);
return saveAs + this.extension;
}
}
public static class ImgurAlbum {
public String title = null;
public URL url = null;
public List<ImgurImage> images = new ArrayList<ImgurImage>();
public ImgurAlbum(URL url) {
this.url = url;
}
public ImgurAlbum(URL url, String title) {
this(url);
this.title = title;
}
public void addImage(ImgurImage image) {
images.add(image);
}
}
}

View File

@ -11,6 +11,8 @@ import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage;
public class RipUtils {
private static final Logger logger = Logger.getLogger(RipUtils.class);
@ -22,7 +24,11 @@ public class RipUtils {
if ((url.getHost().equals("m.imgur.com") || url.getHost().equals("imgur.com"))
&& url.toExternalForm().contains("imgur.com/a/")) {
try {
return ImgurRipper.getURLsFromAlbum(url);
ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url);
for (ImgurImage imgurImage : imgurAlbum.images) {
result.add(imgurImage.url);
}
return result;
} catch (IOException e) {
logger.error("[!] Exception while loading album " + url, e);
}

View File

@ -51,7 +51,6 @@ public class ImgurRipperTest extends RippersTest {
try {
ImgurRipper ripper = new ImgurRipper(url);
assert(ripper.canRip(url));
System.err.println(ripper.getWorkingDir());
deleteDir(ripper.getWorkingDir());
} catch (Exception e) {
fail("Failed to instantiate ripper for " + url);
@ -60,11 +59,12 @@ public class ImgurRipperTest extends RippersTest {
}
public void testImgurAlbums() throws IOException {
if (!DOWNLOAD_CONTENT) {
if (false && !DOWNLOAD_CONTENT) {
return;
}
List<URL> contentURLs = new ArrayList<URL>();
// URLs that should return more than 1 image
/*
contentURLs.add(new URL("http://imgur.com/a/hqJIu")); // Vertical layout
contentURLs.add(new URL("http://imgur.com/a/dS9OQ#0")); // Horizontal layout
contentURLs.add(new URL("http://imgur.com/a/YpsW9#0")); // Grid layout
@ -72,6 +72,8 @@ public class ImgurRipperTest extends RippersTest {
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
*/
contentURLs.add(new URL("http://imgur.com/a/bXQpH"));
for (URL url : contentURLs) {
try {
ImgurRipper ripper = new ImgurRipper(url);