From 3a2dcdb4ec1efbe37d5b4d8c70f197e592d1b37e Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sat, 12 Apr 2014 16:53:49 -0700 Subject: [PATCH] Imgur ripper overhaul, image titles saved to filename #17 Still no album title = directory yet --- .../ripme/ripper/rippers/ImgurRipper.java | 108 ++++++++++++++---- .../com/rarchives/ripme/utils/RipUtils.java | 8 +- .../tst/ripper/rippers/ImgurRipperTest.java | 6 +- 3 files changed, 98 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index f90b7de9..90061bf0 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -1,5 +1,6 @@ package com.rarchives.ripme.ripper.rippers; +import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -19,6 +20,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Utils; public class ImgurRipper extends AbstractRipper { @@ -42,11 +44,6 @@ public class ImgurRipper extends AbstractRipper { SLEEP_BETWEEN_ALBUMS = 1; } - public void processURL(URL url, String prefix, String subdirectory) { - logger.debug("Found URL: " + url); - addURLToDownload(url, prefix, subdirectory); - } - public boolean canRip(URL url) { if (!url.getHost().endsWith(DOMAIN)) { return false; @@ -84,7 +81,6 @@ public class ImgurRipper extends AbstractRipper { break; case USER: - // TODO Get all albums by user ripUserAccount(url); break; case SUBREDDIT: @@ -102,15 +98,25 @@ public class ImgurRipper extends AbstractRipper { int index = 0; this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); index = 0; - for (URL singleURL : getURLsFromAlbum(url)) { + ImgurAlbum album = getImgurAlbum(url); + for (ImgurImage imgurImage : album.images) { + String saveAs = workingDir.getCanonicalPath(); + if (!saveAs.endsWith(File.separator)) { + saveAs += File.separator; + } + if (subdirectory != null && !subdirectory.equals("")) { + saveAs += subdirectory; + } + if (!saveAs.endsWith(File.separator)) { + saveAs += File.separator; + } index += 1; - processURL(singleURL, String.format("%03d_", index), subdirectory); + saveAs += String.format("%03d_%s", index, imgurImage.getSaveAs()); + addURLToDownload(imgurImage.url, new File(saveAs)); } } - - public static List getURLsFromAlbum(URL url) throws IOException { - List result = new ArrayList(); + public static ImgurAlbum getImgurAlbum(URL url) throws IOException { logger.info(" Retrieving " + url.toExternalForm()); Document doc = Jsoup.connect(url.toExternalForm()) .userAgent(USER_AGENT) @@ -122,6 +128,8 @@ public class ImgurRipper extends AbstractRipper { if (m.matches()) { try { JSONObject json = new JSONObject(m.group(1)); + JSONObject jsonAlbum = json.getJSONObject("album"); + ImgurAlbum imgurAlbum = new ImgurAlbum(url, jsonAlbum.getString("title_clean")); JSONArray images = json.getJSONObject("images").getJSONArray("items"); int imagesLength = images.length(); for (int i = 0; i < imagesLength; i++) { @@ -131,9 +139,12 @@ public class ImgurRipper extends AbstractRipper { "http://i.imgur.com/" + image.get("hash") + image.get("ext")); - result.add(imageURL); + ImgurImage imgurImage = new ImgurImage(imageURL, + image.getString("title"), + image.getString("description")); + imgurAlbum.addImage(imgurImage); } - return result; + return imgurAlbum; } catch (JSONException e) { logger.debug("Error while parsing JSON at " + url + ", continuing", e); } @@ -142,19 +153,22 @@ public class ImgurRipper extends AbstractRipper { m = p.matcher(doc.body().html()); if (m.matches()) { try { + ImgurAlbum imgurAlbum = new ImgurAlbum(url); JSONObject json = new JSONObject(m.group(1)); JSONArray images = json.getJSONArray("hashes"); int imagesLength = images.length(); for (int i = 0; i < imagesLength; i++) { JSONObject image = images.getJSONObject(i); URL imageURL = new URL( - "http:" + json.get("cdnUrl") + "http:" + json.getString("cdnUrl") + "/" - + image.get("hash") - + image.get("ext")); - result.add(imageURL); + + image.getString("hash") + + image.getString("ext")); + ImgurImage imgurImage = new ImgurImage(imageURL); + imgurImage.extension = image.getString("ext"); + imgurAlbum.addImage(imgurImage); } - return result; + return imgurAlbum; } catch (JSONException e) { logger.debug("Error while parsing JSON at " + url + ", continuing", e); } @@ -174,6 +188,7 @@ public class ImgurRipper extends AbstractRipper { // Fall back to parsing HTML elements // NOTE: This does not always get the highest-resolution images! + ImgurAlbum imgurAlbum = new ImgurAlbum(url); for (Element thumb : doc.select("div.image")) { String image; if (thumb.select("a.zoom").size() > 0) { @@ -186,9 +201,10 @@ public class ImgurRipper extends AbstractRipper { logger.error("[!] Unable to find image in div: " + thumb.toString()); continue; } - result.add(new URL(image)); + ImgurImage imgurImage = new ImgurImage(new URL(image)); + imgurAlbum.addImage(imgurImage); } - return result; + return imgurAlbum; } /** @@ -318,4 +334,54 @@ public class ImgurRipper extends AbstractRipper { public ALBUM_TYPE getAlbumType() { return albumType; } -} + + public static class ImgurImage { + public String title = "", + description = "", + extension = ""; + public URL url = null; + + public ImgurImage(URL url) { + this.url = url; + String tempUrl = url.toExternalForm(); + this.extension = tempUrl.substring(tempUrl.lastIndexOf('.')); + } + public ImgurImage(URL url, String title) { + this(url); + this.title = title; + } + public ImgurImage(URL url, String title, String description) { + this(url, title); + this.description = description; + } + public String getSaveAs() { + String saveAs = this.title; + String u = url.toExternalForm(); + String imgId = u.substring(u.lastIndexOf('/') + 1, u.lastIndexOf('.')); + if (saveAs == null || saveAs.equals("")) { + saveAs = imgId; + } else { + saveAs = saveAs + "_" + imgId; + } + saveAs = Utils.filesystemSafe(saveAs); + return saveAs + this.extension; + } + } + + public static class ImgurAlbum { + public String title = null; + public URL url = null; + public List images = new ArrayList(); + public ImgurAlbum(URL url) { + this.url = url; + } + public ImgurAlbum(URL url, String title) { + this(url); + this.title = title; + } + public void addImage(ImgurImage image) { + images.add(image); + } + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index 1bb8a64d..b77f13f2 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -11,6 +11,8 @@ import java.util.regex.Pattern; import org.apache.log4j.Logger; import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; +import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; public class RipUtils { private static final Logger logger = Logger.getLogger(RipUtils.class); @@ -22,7 +24,11 @@ public class RipUtils { if ((url.getHost().equals("m.imgur.com") || url.getHost().equals("imgur.com")) && url.toExternalForm().contains("imgur.com/a/")) { try { - return ImgurRipper.getURLsFromAlbum(url); + ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); + for (ImgurImage imgurImage : imgurAlbum.images) { + result.add(imgurImage.url); + } + return result; } catch (IOException e) { logger.error("[!] Exception while loading album " + url, e); } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java index f64028c1..f957cba1 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java @@ -51,7 +51,6 @@ public class ImgurRipperTest extends RippersTest { try { ImgurRipper ripper = new ImgurRipper(url); assert(ripper.canRip(url)); - System.err.println(ripper.getWorkingDir()); deleteDir(ripper.getWorkingDir()); } catch (Exception e) { fail("Failed to instantiate ripper for " + url); @@ -60,11 +59,12 @@ public class ImgurRipperTest extends RippersTest { } public void testImgurAlbums() throws IOException { - if (!DOWNLOAD_CONTENT) { + if (false && !DOWNLOAD_CONTENT) { return; } List contentURLs = new ArrayList(); // URLs that should return more than 1 image + /* contentURLs.add(new URL("http://imgur.com/a/hqJIu")); // Vertical layout contentURLs.add(new URL("http://imgur.com/a/dS9OQ#0")); // Horizontal layout contentURLs.add(new URL("http://imgur.com/a/YpsW9#0")); // Grid layout @@ -72,6 +72,8 @@ public class ImgurRipperTest extends RippersTest { contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0")); contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all")); + */ + contentURLs.add(new URL("http://imgur.com/a/bXQpH")); for (URL url : contentURLs) { try { ImgurRipper ripper = new ImgurRipper(url);