Imgur ripper overhaul, image titles saved to filename #17
Still no album title = directory yet
This commit is contained in:
parent
1efac50834
commit
3a2dcdb4ec
@ -1,5 +1,6 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -19,6 +20,7 @@ import org.jsoup.select.Elements;
|
|||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractRipper;
|
import com.rarchives.ripme.ripper.AbstractRipper;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
||||||
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
|
||||||
public class ImgurRipper extends AbstractRipper {
|
public class ImgurRipper extends AbstractRipper {
|
||||||
|
|
||||||
@ -42,11 +44,6 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
SLEEP_BETWEEN_ALBUMS = 1;
|
SLEEP_BETWEEN_ALBUMS = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void processURL(URL url, String prefix, String subdirectory) {
|
|
||||||
logger.debug("Found URL: " + url);
|
|
||||||
addURLToDownload(url, prefix, subdirectory);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean canRip(URL url) {
|
public boolean canRip(URL url) {
|
||||||
if (!url.getHost().endsWith(DOMAIN)) {
|
if (!url.getHost().endsWith(DOMAIN)) {
|
||||||
return false;
|
return false;
|
||||||
@ -84,7 +81,6 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case USER:
|
case USER:
|
||||||
// TODO Get all albums by user
|
|
||||||
ripUserAccount(url);
|
ripUserAccount(url);
|
||||||
break;
|
break;
|
||||||
case SUBREDDIT:
|
case SUBREDDIT:
|
||||||
@ -102,15 +98,25 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
int index = 0;
|
int index = 0;
|
||||||
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
||||||
index = 0;
|
index = 0;
|
||||||
for (URL singleURL : getURLsFromAlbum(url)) {
|
ImgurAlbum album = getImgurAlbum(url);
|
||||||
|
for (ImgurImage imgurImage : album.images) {
|
||||||
|
String saveAs = workingDir.getCanonicalPath();
|
||||||
|
if (!saveAs.endsWith(File.separator)) {
|
||||||
|
saveAs += File.separator;
|
||||||
|
}
|
||||||
|
if (subdirectory != null && !subdirectory.equals("")) {
|
||||||
|
saveAs += subdirectory;
|
||||||
|
}
|
||||||
|
if (!saveAs.endsWith(File.separator)) {
|
||||||
|
saveAs += File.separator;
|
||||||
|
}
|
||||||
index += 1;
|
index += 1;
|
||||||
processURL(singleURL, String.format("%03d_", index), subdirectory);
|
saveAs += String.format("%03d_%s", index, imgurImage.getSaveAs());
|
||||||
|
addURLToDownload(imgurImage.url, new File(saveAs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<URL> getURLsFromAlbum(URL url) throws IOException {
|
|
||||||
List<URL> result = new ArrayList<URL>();
|
|
||||||
|
|
||||||
|
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
|
||||||
logger.info(" Retrieving " + url.toExternalForm());
|
logger.info(" Retrieving " + url.toExternalForm());
|
||||||
Document doc = Jsoup.connect(url.toExternalForm())
|
Document doc = Jsoup.connect(url.toExternalForm())
|
||||||
.userAgent(USER_AGENT)
|
.userAgent(USER_AGENT)
|
||||||
@ -122,6 +128,8 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
try {
|
try {
|
||||||
JSONObject json = new JSONObject(m.group(1));
|
JSONObject json = new JSONObject(m.group(1));
|
||||||
|
JSONObject jsonAlbum = json.getJSONObject("album");
|
||||||
|
ImgurAlbum imgurAlbum = new ImgurAlbum(url, jsonAlbum.getString("title_clean"));
|
||||||
JSONArray images = json.getJSONObject("images").getJSONArray("items");
|
JSONArray images = json.getJSONObject("images").getJSONArray("items");
|
||||||
int imagesLength = images.length();
|
int imagesLength = images.length();
|
||||||
for (int i = 0; i < imagesLength; i++) {
|
for (int i = 0; i < imagesLength; i++) {
|
||||||
@ -131,9 +139,12 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
"http://i.imgur.com/"
|
"http://i.imgur.com/"
|
||||||
+ image.get("hash")
|
+ image.get("hash")
|
||||||
+ image.get("ext"));
|
+ image.get("ext"));
|
||||||
result.add(imageURL);
|
ImgurImage imgurImage = new ImgurImage(imageURL,
|
||||||
|
image.getString("title"),
|
||||||
|
image.getString("description"));
|
||||||
|
imgurAlbum.addImage(imgurImage);
|
||||||
}
|
}
|
||||||
return result;
|
return imgurAlbum;
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
|
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
|
||||||
}
|
}
|
||||||
@ -142,19 +153,22 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
m = p.matcher(doc.body().html());
|
m = p.matcher(doc.body().html());
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
try {
|
try {
|
||||||
|
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
|
||||||
JSONObject json = new JSONObject(m.group(1));
|
JSONObject json = new JSONObject(m.group(1));
|
||||||
JSONArray images = json.getJSONArray("hashes");
|
JSONArray images = json.getJSONArray("hashes");
|
||||||
int imagesLength = images.length();
|
int imagesLength = images.length();
|
||||||
for (int i = 0; i < imagesLength; i++) {
|
for (int i = 0; i < imagesLength; i++) {
|
||||||
JSONObject image = images.getJSONObject(i);
|
JSONObject image = images.getJSONObject(i);
|
||||||
URL imageURL = new URL(
|
URL imageURL = new URL(
|
||||||
"http:" + json.get("cdnUrl")
|
"http:" + json.getString("cdnUrl")
|
||||||
+ "/"
|
+ "/"
|
||||||
+ image.get("hash")
|
+ image.getString("hash")
|
||||||
+ image.get("ext"));
|
+ image.getString("ext"));
|
||||||
result.add(imageURL);
|
ImgurImage imgurImage = new ImgurImage(imageURL);
|
||||||
|
imgurImage.extension = image.getString("ext");
|
||||||
|
imgurAlbum.addImage(imgurImage);
|
||||||
}
|
}
|
||||||
return result;
|
return imgurAlbum;
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
|
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
|
||||||
}
|
}
|
||||||
@ -174,6 +188,7 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
|
|
||||||
// Fall back to parsing HTML elements
|
// Fall back to parsing HTML elements
|
||||||
// NOTE: This does not always get the highest-resolution images!
|
// NOTE: This does not always get the highest-resolution images!
|
||||||
|
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
|
||||||
for (Element thumb : doc.select("div.image")) {
|
for (Element thumb : doc.select("div.image")) {
|
||||||
String image;
|
String image;
|
||||||
if (thumb.select("a.zoom").size() > 0) {
|
if (thumb.select("a.zoom").size() > 0) {
|
||||||
@ -186,9 +201,10 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
logger.error("[!] Unable to find image in div: " + thumb.toString());
|
logger.error("[!] Unable to find image in div: " + thumb.toString());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
result.add(new URL(image));
|
ImgurImage imgurImage = new ImgurImage(new URL(image));
|
||||||
|
imgurAlbum.addImage(imgurImage);
|
||||||
}
|
}
|
||||||
return result;
|
return imgurAlbum;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -318,4 +334,54 @@ public class ImgurRipper extends AbstractRipper {
|
|||||||
public ALBUM_TYPE getAlbumType() {
|
public ALBUM_TYPE getAlbumType() {
|
||||||
return albumType;
|
return albumType;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
public static class ImgurImage {
|
||||||
|
public String title = "",
|
||||||
|
description = "",
|
||||||
|
extension = "";
|
||||||
|
public URL url = null;
|
||||||
|
|
||||||
|
public ImgurImage(URL url) {
|
||||||
|
this.url = url;
|
||||||
|
String tempUrl = url.toExternalForm();
|
||||||
|
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
|
||||||
|
}
|
||||||
|
public ImgurImage(URL url, String title) {
|
||||||
|
this(url);
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
public ImgurImage(URL url, String title, String description) {
|
||||||
|
this(url, title);
|
||||||
|
this.description = description;
|
||||||
|
}
|
||||||
|
public String getSaveAs() {
|
||||||
|
String saveAs = this.title;
|
||||||
|
String u = url.toExternalForm();
|
||||||
|
String imgId = u.substring(u.lastIndexOf('/') + 1, u.lastIndexOf('.'));
|
||||||
|
if (saveAs == null || saveAs.equals("")) {
|
||||||
|
saveAs = imgId;
|
||||||
|
} else {
|
||||||
|
saveAs = saveAs + "_" + imgId;
|
||||||
|
}
|
||||||
|
saveAs = Utils.filesystemSafe(saveAs);
|
||||||
|
return saveAs + this.extension;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ImgurAlbum {
|
||||||
|
public String title = null;
|
||||||
|
public URL url = null;
|
||||||
|
public List<ImgurImage> images = new ArrayList<ImgurImage>();
|
||||||
|
public ImgurAlbum(URL url) {
|
||||||
|
this.url = url;
|
||||||
|
}
|
||||||
|
public ImgurAlbum(URL url, String title) {
|
||||||
|
this(url);
|
||||||
|
this.title = title;
|
||||||
|
}
|
||||||
|
public void addImage(ImgurImage image) {
|
||||||
|
images.add(image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -11,6 +11,8 @@ import java.util.regex.Pattern;
|
|||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
|
||||||
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
|
||||||
|
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage;
|
||||||
|
|
||||||
public class RipUtils {
|
public class RipUtils {
|
||||||
private static final Logger logger = Logger.getLogger(RipUtils.class);
|
private static final Logger logger = Logger.getLogger(RipUtils.class);
|
||||||
@ -22,7 +24,11 @@ public class RipUtils {
|
|||||||
if ((url.getHost().equals("m.imgur.com") || url.getHost().equals("imgur.com"))
|
if ((url.getHost().equals("m.imgur.com") || url.getHost().equals("imgur.com"))
|
||||||
&& url.toExternalForm().contains("imgur.com/a/")) {
|
&& url.toExternalForm().contains("imgur.com/a/")) {
|
||||||
try {
|
try {
|
||||||
return ImgurRipper.getURLsFromAlbum(url);
|
ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url);
|
||||||
|
for (ImgurImage imgurImage : imgurAlbum.images) {
|
||||||
|
result.add(imgurImage.url);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("[!] Exception while loading album " + url, e);
|
logger.error("[!] Exception while loading album " + url, e);
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,6 @@ public class ImgurRipperTest extends RippersTest {
|
|||||||
try {
|
try {
|
||||||
ImgurRipper ripper = new ImgurRipper(url);
|
ImgurRipper ripper = new ImgurRipper(url);
|
||||||
assert(ripper.canRip(url));
|
assert(ripper.canRip(url));
|
||||||
System.err.println(ripper.getWorkingDir());
|
|
||||||
deleteDir(ripper.getWorkingDir());
|
deleteDir(ripper.getWorkingDir());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
fail("Failed to instantiate ripper for " + url);
|
fail("Failed to instantiate ripper for " + url);
|
||||||
@ -60,11 +59,12 @@ public class ImgurRipperTest extends RippersTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testImgurAlbums() throws IOException {
|
public void testImgurAlbums() throws IOException {
|
||||||
if (!DOWNLOAD_CONTENT) {
|
if (false && !DOWNLOAD_CONTENT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
List<URL> contentURLs = new ArrayList<URL>();
|
List<URL> contentURLs = new ArrayList<URL>();
|
||||||
// URLs that should return more than 1 image
|
// URLs that should return more than 1 image
|
||||||
|
/*
|
||||||
contentURLs.add(new URL("http://imgur.com/a/hqJIu")); // Vertical layout
|
contentURLs.add(new URL("http://imgur.com/a/hqJIu")); // Vertical layout
|
||||||
contentURLs.add(new URL("http://imgur.com/a/dS9OQ#0")); // Horizontal layout
|
contentURLs.add(new URL("http://imgur.com/a/dS9OQ#0")); // Horizontal layout
|
||||||
contentURLs.add(new URL("http://imgur.com/a/YpsW9#0")); // Grid layout
|
contentURLs.add(new URL("http://imgur.com/a/YpsW9#0")); // Grid layout
|
||||||
@ -72,6 +72,8 @@ public class ImgurRipperTest extends RippersTest {
|
|||||||
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
|
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
|
||||||
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
|
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
|
||||||
contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
|
contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
|
||||||
|
*/
|
||||||
|
contentURLs.add(new URL("http://imgur.com/a/bXQpH"));
|
||||||
for (URL url : contentURLs) {
|
for (URL url : contentURLs) {
|
||||||
try {
|
try {
|
||||||
ImgurRipper ripper = new ImgurRipper(url);
|
ImgurRipper ripper = new ImgurRipper(url);
|
||||||
|
Loading…
Reference in New Issue
Block a user