From 04ce12b546fe775101cc880cb072c3c284014626 Mon Sep 17 00:00:00 2001
From: 4pr0n <admin@rarchives.com>
Date: Fri, 28 Feb 2014 03:04:03 -0800
Subject: [PATCH] More imgur ripping support. Better logging.

Can rip imgur account pages, series of imgur images (separated by
commas). Also gets highest-res imgur images.
Logging is less verbose and more human-readable.
---
 .../ripme/ripper/AbstractRipper.java          |  71 ++++++++----
 .../ripme/ripper/DownloadFileThread.java      |  10 +-
 .../ripme/ripper/DownloadThreadPool.java      |   1 -
 .../ripme/ripper/rippers/ImgurRipper.java     | 106 ++++++++++++++++--
 .../java/com/rarchives/ripme/utils/Utils.java |  23 ++++
 5 files changed, 178 insertions(+), 33 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
index d398d187..336f6256 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@@ -42,28 +42,32 @@ public abstract class AbstractRipper implements RipperInterface {
         this.threadPool = new DownloadThreadPool();
     }
 
+    /**
+     * Queues image to be downloaded and saved.
+     * Uses filename from URL to decide filename.
+     * @param url
+     *      URL to download
+     */
     public void addURLToDownload(URL url) {
-        addURLToDownload(url, "");
+        // Use empty prefix and empty subdirectory
+        addURLToDownload(url, "", "");
     }
 
-    public void addURLToDownload(URL url, String prefix) {
-        String saveAs = url.toExternalForm();
-        saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
-        if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
-        if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
-        if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
-        File saveFileAs;
-        try {
-            saveFileAs = new File(workingDir.getCanonicalPath() + File.separator + prefix + saveAs);
-        } catch (IOException e) {
-            logger.error("[!] Error creating save file path for URL '" + url + "':", e);
-            return;
-        }
-        logger.debug("Downloading " + url + " to " + saveFileAs);
-        addURLToDownload(url, saveFileAs);
-    }
     /**
-     * Add image to be downloaded and saved.
+     * Queues image to be downloaded and saved.
+     * Uses filename from URL (and 'prefix') to decide filename.
+     * @param url
+     *      URL to download
+     * @param prefix
+     *      Text to append to saved filename.
+     */
+    public void addURLToDownload(URL url, String prefix) {
+        // Use empty subdirectory
+        addURLToDownload(url, prefix, "");
+    }
+
+    /**
+     * Queues image to be downloaded and saved.
      * @param url
      *      URL of the file
      * @param saveAs
@@ -73,6 +77,35 @@ public abstract class AbstractRipper implements RipperInterface {
         threadPool.addThread(new DownloadFileThread(url, saveAs));
     }
 
+    public void addURLToDownload(URL url, String prefix, String subdirectory) {
+        String saveAs = url.toExternalForm();
+        saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
+        if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
+        if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
+        if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
+        File saveFileAs;
+        try {
+            if (!subdirectory.equals("")) {
+                subdirectory = File.separator + subdirectory;
+            }
+            saveFileAs = new File(
+                    workingDir.getCanonicalPath()
+                    + subdirectory
+                    + File.separator
+                    + prefix
+                    + saveAs);
+        } catch (IOException e) {
+            logger.error("[!] Error creating save file path for URL '" + url + "':", e);
+            return;
+        }
+        logger.debug("Downloading " + url + " to " + saveFileAs);
+        if (!saveFileAs.getParentFile().exists()) {
+            logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+            saveFileAs.getParentFile().mkdirs();
+        }
+        addURLToDownload(url, saveFileAs);
+    }
+
     public URL getURL() {
         return url;
     }
@@ -85,7 +118,7 @@ public abstract class AbstractRipper implements RipperInterface {
         path += getHost() + "_" + getGID(this.url) + File.separator;
         this.workingDir = new File(path);
         if (!this.workingDir.exists()) {
-            logger.info("[+] Creating working directory: " + this.workingDir);
+            logger.info("[+] Creating directory: " + Utils.removeCWD(this.workingDir));
             this.workingDir.mkdirs();
         }
         logger.debug("Set working directory to: " + this.workingDir);
diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
index 820a12b3..8ab3654f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
+++ b/src/main/java/com/rarchives/ripme/ripper/DownloadFileThread.java
@@ -17,12 +17,14 @@ public class DownloadFileThread extends Thread {
 
     private URL url;
     private File saveAs;
+    private String prettySaveAs;
     private int retries;
 
     public DownloadFileThread(URL url, File saveAs) {
         super();
         this.url = url;
         this.saveAs = saveAs;
+        this.prettySaveAs = Utils.removeCWD(saveAs);
         this.retries = Utils.getConfigInteger("download.retries", 1);
     }
 
@@ -30,10 +32,10 @@ public class DownloadFileThread extends Thread {
         // Check if file already exists
         if (saveAs.exists()) {
             if (Utils.getConfigBoolean("file.overwrite", false)) {
-                logger.info("[!] File already exists and 'file.overwrite' is true, deleting: " + saveAs);
+                logger.info("[!] File already exists and 'file.overwrite' is true, deleting: " + prettySaveAs);
                 saveAs.delete();
             } else {
-                logger.info("[!] Not downloading " + url + " because file already exists: " + saveAs);
+                logger.info("[!] Skipping " + url + " -- file already exists: " + prettySaveAs);
                 return;
             }
         }
@@ -41,7 +43,7 @@ public class DownloadFileThread extends Thread {
         int tries = 0; // Number of attempts to download
         do {
             try {
-                logger.info("[ ] Downloading file from: " + url + (tries > 0 ? " Retry #" + tries : ""));
+                logger.info("    Downloading file: " + url + (tries > 0 ? " Retry #" + tries : ""));
                 tries += 1;
                 Response response;
                 response = Jsoup.connect(url.toExternalForm())
@@ -59,7 +61,7 @@ public class DownloadFileThread extends Thread {
                 return;
             }
         } while (true);
-        logger.info("[+] Download completed: " + url);
+        logger.info("[+] Saved " + url + " as " + this.prettySaveAs);
     }
 
 }
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java b/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java
index 1c5a0ac6..e0a93404 100644
--- a/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java
+++ b/src/main/java/com/rarchives/ripme/ripper/DownloadThreadPool.java
@@ -24,7 +24,6 @@ public class DownloadThreadPool {
     }
     
     public void waitForThreads() {
-        logger.info("[ ] Waiting for threads to finish...");
         threadPool.shutdown();
         try {
             threadPool.awaitTermination(60, TimeUnit.SECONDS);
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
index 2e8a5406..65ebd7a4 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java
@@ -7,6 +7,9 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.log4j.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
@@ -19,6 +22,8 @@ public class ImgurRipper extends AbstractRipper {
                                 HOST   = "imgur";
     private static final Logger logger = Logger.getLogger(ImgurRipper.class);
     
+    private final int SLEEP_BETWEEN_ALBUMS;
+    
     static enum ALBUM_TYPE {
         ALBUM,
         USER,
@@ -29,11 +34,12 @@ public class ImgurRipper extends AbstractRipper {
 
     public ImgurRipper(URL url) throws IOException {
         super(url);
+        SLEEP_BETWEEN_ALBUMS = 1;
     }
 
-    public void processURL(URL url, String prefix) {
+    public void processURL(URL url, String prefix, String subdirectory) {
        logger.debug("Found URL: " + url);
-       addURLToDownload(url, prefix);
+       addURLToDownload(url, prefix, subdirectory);
     }
 
     public boolean canRip(URL url) {
@@ -61,27 +67,83 @@ public class ImgurRipper extends AbstractRipper {
     public void rip() throws IOException {
         switch (albumType) {
         case ALBUM:
-            this.url = new URL(this.url.toExternalForm() + "/noscript");
+            this.url = new URL(this.url.toExternalForm());
             // Fall-through
         case USER_ALBUM:
             ripAlbum(this.url);
             break;
 
         case SERIES_OF_IMAGES:
-            // TODO Get all images
+            ripAlbum(this.url);
             break;
 
         case USER:
             // TODO Get all albums by user
+            ripUserAccount(url);
             break;
         }
         threadPool.waitForThreads();
     }
 
     private void ripAlbum(URL url) throws IOException {
+        ripAlbum(url, "");
+    }
+
+    private void ripAlbum(URL url, String subdirectory) throws IOException {
         int index = 0;
-        logger.info("[ ] Retrieving " + url.toExternalForm());
+        logger.info("    Retrieving " + url.toExternalForm());
         Document doc = Jsoup.connect(url.toExternalForm()).get();
+
+        // Try to use embedded JSON to retrieve images
+        Pattern p = Pattern.compile("^.*Imgur\\.Album\\.getInstance\\((.*)\\);.*$", Pattern.DOTALL);
+        Matcher m = p.matcher(doc.body().html());
+        if (m.matches()) {
+            try {
+                JSONObject json = new JSONObject(m.group(1));
+                JSONArray images = json.getJSONObject("images").getJSONArray("items");
+                int imagesLength = images.length();
+                for (int i = 0; i < imagesLength; i++) {
+                    JSONObject image = images.getJSONObject(i);
+                    URL imageURL = new URL(
+                            // CDN url is provided elsewhere in the document
+                            "http://i.imgur.com/"
+                                    + image.get("hash")
+                                    + image.get("ext"));
+                    index += 1;
+                    processURL(imageURL, String.format("%03d_", index), subdirectory);
+                }
+                return;
+            } catch (JSONException e) {
+                logger.debug("Error while parsing JSON at " + url + ", continuing", e);
+            }
+        }
+        p = Pattern.compile("^.*= new ImgurShare\\((.*)\\);.*$", Pattern.DOTALL);
+        m = p.matcher(doc.body().html());
+        if (m.matches()) {
+            try {
+                JSONObject json = new JSONObject(m.group(1));
+                JSONArray images = json.getJSONArray("hashes");
+                int imagesLength = images.length();
+                for (int i = 0; i < imagesLength; i++) {
+                    JSONObject image = images.getJSONObject(i);
+                    URL imageURL = new URL(
+                            "http:" + json.get("cdnUrl")
+                                    + "/"
+                                    + image.get("hash")
+                                    + image.get("ext"));
+                    index += 1;
+                    processURL(imageURL, String.format("%03d_", index), subdirectory);
+                }
+                return;
+            } catch (JSONException e) {
+                logger.debug("Error while parsing JSON at " + url + ", continuing", e);
+            }
+        }
+
+        logger.info("[!] Falling back to elemental retrieval method");
+
+        // Fall back to parsing HTML elements
+        // NOTE: This does not always get the highest-resolution images!
         for (Element thumb : doc.select("div.image")) {
             String image;
             if (thumb.select("a.zoom").size() > 0) {
@@ -95,7 +157,33 @@ public class ImgurRipper extends AbstractRipper {
                 continue;
             }
             index += 1;
-            processURL(new URL(image), String.format("%03d_", index));
+            processURL(new URL(image), String.format("%03d_", index), subdirectory);
+        }
+    }
+    
+    /**
+     * Rips all albums in an imgur user's account.
+     * @param url
+     *      URL to imgur user account (http://username.imgur.com)
+     * @throws IOException
+     */
+    private void ripUserAccount(URL url) throws IOException {
+        logger.info("[ ] Retrieving " + url.toExternalForm());
+        Document doc = Jsoup.connect(url.toExternalForm()).get();
+        for (Element album : doc.select("div.cover a")) {
+            if (!album.hasAttr("href")
+                    || !album.attr("href").contains("imgur.com/a/")) {
+                continue;
+            }
+            String albumID = album.attr("href").substring(album.attr("href").lastIndexOf('/') + 1);
+            URL albumURL = new URL("http:" + album.attr("href") + "/noscript");
+            try {
+                ripAlbum(albumURL, albumID);
+                Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
+            } catch (Exception e) {
+                logger.error("Error while ripping album: " + e.getMessage(), e);
+                continue;
+            }
         }
     }
 
@@ -115,12 +203,12 @@ public class ImgurRipper extends AbstractRipper {
             this.url = new URL("http://imgur.com/a/" + gid);
             return gid;
         }
-        p = Pattern.compile("^https?://([a-zA-Z0-9\\-])\\.imgur\\.com/?$");
+        p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.imgur\\.com/?$");
         m = p.matcher(url.toExternalForm());
         if (m.matches()) {
             // Root imgur account
             albumType = ALBUM_TYPE.USER;
-            return m.group(m.groupCount());
+            return m.group(1);
         }
         p = Pattern.compile("^https?://([a-zA-Z0-9\\-])\\.imgur\\.com/([a-zA-Z0-9])?$");
         m = p.matcher(url.toExternalForm());
@@ -134,7 +222,7 @@ public class ImgurRipper extends AbstractRipper {
         if (m.matches()) {
             // Series of imgur images
             albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
-            return m.group();
+            return m.group(m.groupCount()).replaceAll(",", "-");
         }
         throw new MalformedURLException("Unexpected URL format: " + url.toExternalForm());
     }
diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java
index 2c8e917a..623b492d 100644
--- a/src/main/java/com/rarchives/ripme/utils/Utils.java
+++ b/src/main/java/com/rarchives/ripme/utils/Utils.java
@@ -49,4 +49,27 @@ public class Utils {
         config.setProperty(key, value);
     }
 
+    /**
+     * Removes the current working directory (CWD) from a File.
+     * @param saveAs
+     *      The File path
+     * @return
+     *      saveAs in relation to the CWD
+     */
+    public static String removeCWD(File saveAs) {
+        String prettySaveAs;
+        try {
+            String cwd = new File(".").getCanonicalPath() + File.separator;
+            prettySaveAs = saveAs.getCanonicalPath().replace(
+                    cwd,
+                    "");
+        } catch (Exception e) {
+            prettySaveAs = saveAs.toString();
+        }
+        return prettySaveAs;
+    }
+
+    public static String removeCWD(String file) {
+        return removeCWD(new File(file));
+    }
 }
\ No newline at end of file