From 72b40394aeb452887a5591280c0bbde53365f6fd Mon Sep 17 00:00:00 2001
From: Wiiplay123 <TheRealWiiplay123@gmail.com>
Date: Fri, 28 Nov 2014 09:50:04 -0600
Subject: [PATCH 1/3] Added deviantART description ripping

It needs some work, notably the description text file doesn't have
prefix. Also, fixed a resource leak in Utils.java
---
 .../ripme/ripper/AbstractRipper.java          |  41 +++
 .../ripme/ripper/DeviantartRipper.java        | 309 ++++++++++++++++++
 .../java/com/rarchives/ripme/utils/Utils.java |   2 +
 3 files changed, 352 insertions(+)
 create mode 100644 src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java

diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
index e1ea4a23..71cfe86e 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@@ -2,6 +2,7 @@ package com.rarchives.ripme.ripper;
 
 import java.awt.Desktop;
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.net.MalformedURLException;
@@ -131,6 +132,46 @@ public abstract class AbstractRipper
         }
         return addURLToDownload(url, saveFileAs, referrer, cookies);
     }
+    public boolean saveText(URL url, String subdirectory, String referrer, Map<String,String> cookies, String text) {
+        try {
+            stopCheck();
+        } catch (IOException e) {
+            return false;
+        }
+        String saveAs = url.toExternalForm();
+        saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
+        if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
+        if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
+        if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
+        if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
+        File saveFileAs;
+        try {
+            if (!subdirectory.equals("")) {
+                subdirectory = File.separator + subdirectory;
+            }
+            // TODO Get prefix working again, probably requires reworking a lot of stuff!
+            saveFileAs = new File(
+                    workingDir.getCanonicalPath()
+                    + subdirectory
+                    // + prefix
+                    + File.separator
+                    + saveAs
+                    + ".txt");
+            // Write the file
+            FileOutputStream out = (new FileOutputStream(saveFileAs));
+            out.write(text.getBytes());
+            out.close();
+        } catch (IOException e) {
+            logger.error("[!] Error creating save file path for description '" + url + "':", e);
+            return false;
+        }
+        logger.debug("Downloading " + url + "'s description to " + saveFileAs);
+        if (!saveFileAs.getParentFile().exists()) {
+            logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+            saveFileAs.getParentFile().mkdirs();
+        }
+        return true;
+    }
     
     /**
      * Queues file to be downloaded and saved. With options.
diff --git a/src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java
new file mode 100644
index 00000000..15b4a064
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java
@@ -0,0 +1,309 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.Connection.Method;
+import org.jsoup.Connection.Response;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Base64;
+import com.rarchives.ripme.utils.Http;
+import com.rarchives.ripme.utils.Utils;
+
+public class DeviantartRipper extends AbstractHTMLRipper {
+
+    private static final int SLEEP_TIME = 2000;
+
+    private Map<String,String> cookies = new HashMap<String,String>();
+    private Set<String> triedURLs = new HashSet<String>();
+
+    public DeviantartRipper(URL url) throws IOException {
+        super(url);
+    }
+
+    @Override
+    public String getHost() {
+        return "deviantart";
+    }
+    @Override
+    public String getDomain() {
+        return "deviantart.com";
+    }
+
+    @Override
+    public URL sanitizeURL(URL url) throws MalformedURLException {
+        String u = url.toExternalForm();
+        String subdir = "/";
+        if (u.contains("catpath=scraps")) {
+            subdir = "scraps";
+        }
+        u = u.replaceAll("\\?.*", "?catpath=" + subdir);
+        return new URL(u);
+    }
+
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+        Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com(/gallery)?/?(\\?.*)?$");
+        Matcher m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            // Root gallery
+            if (url.toExternalForm().contains("catpath=scraps")) {
+                return m.group(1) + "_scraps";
+            }
+            else {
+                return m.group(1);
+            }
+        }
+        p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$");
+        m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            // Subgallery
+            return m.group(1) + "_" + m.group(2);
+        }
+        p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/?$");
+        m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            // Subgallery
+            return m.group(1) + "_faves";
+        }
+        throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url);
+    }
+
+    @Override
+    public Document getFirstPage() throws IOException {
+        // Login
+        try {
+            cookies = loginToDeviantart();
+        } catch (Exception e) {
+            logger.warn("Failed to login: ", e);
+        }
+        return Http.url(this.url)
+                   .cookies(cookies)
+                   .get();
+    }
+
+    @Override
+    public List<String> getURLsFromPage(Document page) {
+        List<String> imageURLs = new ArrayList<String>();
+
+        // Iterate over all thumbnails
+        for (Element thumb : page.select("div.zones-container a.thumb")) {
+            if (isStopped()) {
+                break;
+            }
+            Element img = thumb.select("img").get(0);
+            if (img.attr("transparent").equals("false")) {
+                continue; // a.thumbs to other albums are invisible
+            }
+
+            // Get full-sized image via helper methods
+            String fullSize = null;
+            String desc = null;
+            try {
+                fullSize = thumbToFull(img.attr("src"), true);
+            } catch (Exception e) {
+                logger.info("Attempting to get full size image from " + thumb.attr("href"));
+                fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
+            }
+            try {
+                desc = smallToDescription(thumb.attr("href"));
+            } catch (Exception e) {
+                logger.info("Could not get description from " + thumb.attr("href"));
+            }
+            try {
+				saveText(new URL(thumb.attr("href")), "", this.url.toExternalForm(), cookies, desc);
+			} catch (MalformedURLException e) {
+				logger.info("Malformed URL while getting description from " + thumb.attr("href"));
+			}
+            if (fullSize == null) {
+                continue;
+            }
+            if (triedURLs.contains(fullSize)) {
+                logger.warn("Already tried to download " + fullSize);
+                continue;
+            }
+            triedURLs.add(fullSize);
+            imageURLs.add(fullSize);
+        }
+        return imageURLs;
+    }
+    
+    @Override
+    public Document getNextPage(Document page) throws IOException {
+        Elements nextButtons = page.select("li.next > a");
+        if (nextButtons.size() == 0) {
+            throw new IOException("No next page found");
+        }
+        Element a = nextButtons.first();
+        if (a.hasClass("disabled")) {
+            throw new IOException("Hit end of pages");
+        }
+        String nextPage = a.attr("href");
+        if (nextPage.startsWith("/")) {
+            nextPage = "http://" + this.url.getHost() + nextPage;
+        }
+        if (!sleep(SLEEP_TIME)) {
+            throw new IOException("Interrupted while waiting to load next page: " + nextPage);
+        }
+        logger.info("Found next page: " + nextPage);
+        return Http.url(nextPage)
+                   .cookies(cookies)
+                   .get();
+    }
+
+    @Override
+    public void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
+    }
+
+    /**
+     * Tries to get full size image from thumbnail URL
+     * @param thumb Thumbnail URL
+     * @param throwException Whether or not to throw exception when full size image isn't found
+     * @return Full-size image URL
+     * @throws Exception If it can't find the full-size URL
+     */
+    public static String thumbToFull(String thumb, boolean throwException) throws Exception {
+        thumb = thumb.replace("http://th", "http://fc");
+        List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/")));
+        fields.remove(4);
+        if (!fields.get(4).equals("f") && throwException) {
+            // Not a full-size image
+            throw new Exception("Can't get full size image from " + thumb);
+        }
+        StringBuilder result = new StringBuilder();
+        for (int i = 0; i < fields.size(); i++) {
+            if (i > 0) {
+                result.append("/");
+            }
+            result.append(fields.get(i));
+        }
+        return result.toString();
+    }
+    
+    /**
+     * Attempts to download description for image.
+     * Comes in handy when people put entire stories in their description.
+     * If no description was found, returns null.
+     * @param page The page the description will be retrieved from
+     * @return The description
+     */
+    public String smallToDescription(String page) {
+        try {
+            // Fetch the image page
+            Response resp = Http.url(page)
+                                .referrer(this.url)
+                                .cookies(cookies)
+                                .response();
+            cookies.putAll(resp.cookies());
+
+            // Try to find the "Download" box
+            Elements els = resp.parse().select("div[class=dev-description]");
+            if (els.size() == 0) {
+                throw new IOException("No description found");
+            }
+            // Full-size image
+            String desc = els.text(); // TODO Figure out how to preserve newlines
+            return desc;
+        } catch (IOException ioe) {
+                logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
+                return null;
+        }
+    }
+   
+    /**
+     * If largest resolution for image at 'thumb' is found, starts downloading
+     * and returns null.
+     * If it finds a larger resolution on another page, returns the image URL.
+     * @param thumb Thumbnail URL
+     * @param page Page the thumbnail is retrieved from
+     * @return Highest-resolution version of the image based on thumbnail URL and the page.
+     */
+    public String smallToFull(String thumb, String page) {
+        try {
+            // Fetch the image page
+            Response resp = Http.url(page)
+                                .referrer(this.url)
+                                .cookies(cookies)
+                                .response();
+            cookies.putAll(resp.cookies());
+
+            // Try to find the description
+            Elements els = resp.parse().select("a.dev-page-download");
+            if (els.size() == 0) {
+                throw new IOException("No download page found");
+            }
+            // Full-size image
+            String fsimage = els.get(0).attr("href");
+            return fsimage;
+        } catch (IOException ioe) {
+            try {
+                logger.info("Failed to get full size download image at " + page + " : '" + ioe.getMessage() + "'");
+                String lessThanFull = thumbToFull(thumb, false);
+                logger.info("Falling back to less-than-full-size image " + lessThanFull);
+                return lessThanFull;
+            } catch (Exception e) {
+                return null;
+            }
+        }
+    }
+
+    /**
+     * Logs into deviant art. Required to rip full-size NSFW content.
+     * @return Map of cookies containing session data.
+     */
+    private Map<String, String> loginToDeviantart() throws IOException {
+        // Populate postData fields
+        Map<String,String> postData = new HashMap<String,String>();
+        String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
+        String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
+        if (username == null || password == null) {
+            throw new IOException("could not find username or password in config");
+        }
+        Response resp = Http.url("http://www.deviantart.com/")
+                            .response();
+        for (Element input : resp.parse().select("form#form-login input[type=hidden]")) {
+            postData.put(input.attr("name"), input.attr("value"));
+        }
+        postData.put("username", username);
+        postData.put("password", password);
+        postData.put("remember_me", "1");
+
+        // Send login request
+        resp = Http.url("https://www.deviantart.com/users/login")
+                    .userAgent(USER_AGENT)
+                    .data(postData)
+                    .cookies(resp.cookies())
+                    .method(Method.POST)
+                    .response();
+
+        // Assert we are logged in
+        if (resp.hasHeader("Location") && resp.header("Location").contains("password")) {
+            // Wrong password
+            throw new IOException("Wrong password");
+        }
+        if (resp.url().toExternalForm().contains("bad_form")) {
+            throw new IOException("Login form was incorrectly submitted");
+        }
+        if (resp.cookie("auth_secure") == null ||
+            resp.cookie("auth") == null) {
+            throw new IOException("No auth_secure or auth cookies received");
+        }
+        // We are logged in, save the cookies
+        return resp.cookies();
+    }
+}
diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java
index 99a4a667..c51de33e 100644
--- a/src/main/java/com/rarchives/ripme/utils/Utils.java
+++ b/src/main/java/com/rarchives/ripme/utils/Utils.java
@@ -231,10 +231,12 @@ public class Utils {
                             classes.add(Class.forName(className));
                         } catch (ClassNotFoundException e) {
                             logger.error("ClassNotFoundException loading " + className);
+                            jarFile.close(); // Resource leak fix?
                             throw new RuntimeException("ClassNotFoundException loading " + className);
                         }
                     }
                 }
+                jarFile.close(); // Eclipse said not closing it would have a resource leak
             } catch (IOException e) {
                 logger.error("Error while loading jar file:", e);
                 throw new RuntimeException(pkgname + " (" + directory + ") does not appear to be a valid package", e);

From 16e0d27f66e714f598b86e39576004ed41a6c022 Mon Sep 17 00:00:00 2001
From: Wiiplay123 <TheRealWiiplay123@gmail.com>
Date: Fri, 28 Nov 2014 22:59:39 -0600
Subject: [PATCH 2/3] Improved description ripping for deviantART

Also, added some functions to AbstractHTMLRipper that should allow
description ripping to be added to other rippers as well.
hasDescriptionSupport() is a function that will be overridden by a
ripper that supports descriptions, and will trigger the description
ripper. getDescription will grab the description from a page, and must
be overridden if you want to grab a description with a ripper.
---
 .../ripme/ripper/AbstractHTMLRipper.java      | 75 +++++++++++++++++--
 .../ripme/ripper/AbstractRipper.java          | 42 +----------
 .../ripper/rippers/DeviantartRipper.java      | 57 +++++++++++++-
 3 files changed, 124 insertions(+), 50 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
index be73c717..6953704c 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@@ -1,5 +1,7 @@
 package com.rarchives.ripme.ripper;
 
+import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
@@ -14,7 +16,7 @@ import com.rarchives.ripme.utils.Utils;
  * Simplified ripper, designed for ripping from sites by parsing HTML.
  */
 public abstract class AbstractHTMLRipper extends AlbumRipper {
-
+	
     public AbstractHTMLRipper(URL url) throws IOException {
         super(url);
     }
@@ -27,6 +29,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
         throw new IOException("getNextPage not implemented");
     }
     public abstract List<String> getURLsFromPage(Document page);
+    public List<String> getDescriptionsFromPage(Document doc) throws IOException {
+    	throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
+    }
     public abstract void downloadURL(URL url, int index);
     public DownloadThreadPool getThreadPool() {
         return null;
@@ -45,21 +50,27 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
     public URL sanitizeURL(URL url) throws MalformedURLException {
         return url;
     }
-
+    public boolean hasDescriptionSupport() {
+		return false;
+    }
+    public String getDescription(String page) throws IOException {
+    	throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
+    }
     @Override
     public void rip() throws IOException {
         int index = 0;
+        int textindex = 0;
         logger.info("Retrieving " + this.url);
         sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
         Document doc = getFirstPage();
-
+        
         while (doc != null) {
             List<String> imageURLs = getURLsFromPage(doc);
 
             if (imageURLs.size() == 0) {
                 throw new IOException("No images found at " + doc.location());
             }
-
+            
             for (String imageURL : imageURLs) {
                 if (isStopped()) {
                     break;
@@ -67,6 +78,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
                 index += 1;
                 downloadURL(new URL(imageURL), index);
             }
+            if (hasDescriptionSupport()) {
+            	List<String> textURLs = getDescriptionsFromPage(doc);
+            	if (textURLs.size() > 0) {
+            		for (String textURL : textURLs) {
+            			if (isStopped()) {
+            				break;
+            			}
+            			textindex += 1;
+            			String tempDesc = getDescription(textURL);
+            			if (tempDesc != null) {
+            				saveText(new URL(textURL), "", tempDesc, textindex);
+            			}
+            		}
+            	}
+            }
 
             if (isStopped()) {
                 break;
@@ -87,7 +113,46 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
         }
         waitForThreads();
     }
-
+    public boolean saveText(URL url, String subdirectory, String text, int index) {
+        try {
+            stopCheck();
+        } catch (IOException e) {
+            return false;
+        }
+        String saveAs = url.toExternalForm();
+        saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
+        if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
+        if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
+        if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
+        if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
+        File saveFileAs;
+        try {
+            if (!subdirectory.equals("")) { // Not sure about this part
+                subdirectory = File.separator + subdirectory;
+            }
+            // TODO Get prefix working again, probably requires reworking a lot of stuff!
+            saveFileAs = new File(
+                    workingDir.getCanonicalPath()
+                    + subdirectory
+                    + File.separator
+                    + getPrefix(index)
+                    + saveAs
+                    + ".txt");
+            // Write the file
+            FileOutputStream out = (new FileOutputStream(saveFileAs));
+            out.write(text.getBytes());
+            out.close();
+        } catch (IOException e) {
+            logger.error("[!] Error creating save file path for description '" + url + "':", e);
+            return false;
+        }
+        logger.debug("Downloading " + url + "'s description to " + saveFileAs);
+        if (!saveFileAs.getParentFile().exists()) {
+            logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+            saveFileAs.getParentFile().mkdirs();
+        }
+        return true;
+    }
     public String getPrefix(int index) {
         String prefix = "";
         if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
index 71cfe86e..33b5fd45 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@@ -2,7 +2,6 @@ package com.rarchives.ripme.ripper;
 
 import java.awt.Desktop;
 import java.io.File;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.net.MalformedURLException;
@@ -132,46 +131,7 @@ public abstract class AbstractRipper
         }
         return addURLToDownload(url, saveFileAs, referrer, cookies);
     }
-    public boolean saveText(URL url, String subdirectory, String referrer, Map<String,String> cookies, String text) {
-        try {
-            stopCheck();
-        } catch (IOException e) {
-            return false;
-        }
-        String saveAs = url.toExternalForm();
-        saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
-        if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
-        if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
-        if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
-        if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
-        File saveFileAs;
-        try {
-            if (!subdirectory.equals("")) {
-                subdirectory = File.separator + subdirectory;
-            }
-            // TODO Get prefix working again, probably requires reworking a lot of stuff!
-            saveFileAs = new File(
-                    workingDir.getCanonicalPath()
-                    + subdirectory
-                    // + prefix
-                    + File.separator
-                    + saveAs
-                    + ".txt");
-            // Write the file
-            FileOutputStream out = (new FileOutputStream(saveFileAs));
-            out.write(text.getBytes());
-            out.close();
-        } catch (IOException e) {
-            logger.error("[!] Error creating save file path for description '" + url + "':", e);
-            return false;
-        }
-        logger.debug("Downloading " + url + "'s description to " + saveFileAs);
-        if (!saveFileAs.getParentFile().exists()) {
-            logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
-            saveFileAs.getParentFile().mkdirs();
-        }
-        return true;
-    }
+    
     
     /**
      * Queues file to be downloaded and saved. With options.
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
index 2c88c690..4e9c51c8 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
@@ -43,7 +43,10 @@ public class DeviantartRipper extends AbstractHTMLRipper {
     public String getDomain() {
         return "deviantart.com";
     }
-
+    @Override
+    public boolean hasDescriptionSupport() {
+		return true;
+    }
     @Override
     public URL sanitizeURL(URL url) throws MalformedURLException {
         String u = url.toExternalForm();
@@ -118,7 +121,6 @@ public class DeviantartRipper extends AbstractHTMLRipper {
                 logger.info("Attempting to get full size image from " + thumb.attr("href"));
                 fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
             }
-
             if (fullSize == null) {
                 continue;
             }
@@ -131,7 +133,23 @@ public class DeviantartRipper extends AbstractHTMLRipper {
         }
         return imageURLs;
     }
-    
+    @Override
+    public List<String> getDescriptionsFromPage(Document page) {
+        List<String> textURLs = new ArrayList<String>();
+
+        // Iterate over all thumbnails
+        for (Element thumb : page.select("div.zones-container a.thumb")) {
+            if (isStopped()) {
+                break;
+            }
+            Element img = thumb.select("img").get(0);
+            if (img.attr("transparent").equals("false")) {
+                continue; // a.thumbs to other albums are invisible
+            }
+            textURLs.add(thumb.attr("href"));
+        }
+        return textURLs;
+    }
     @Override
     public Document getNextPage(Document page) throws IOException {
         Elements nextButtons = page.select("li.next > a");
@@ -184,7 +202,38 @@ public class DeviantartRipper extends AbstractHTMLRipper {
         }
         return result.toString();
     }
+    
+    /**
+     * Attempts to download description for image.
+     * Comes in handy when people put entire stories in their description.
+     * If no description was found, returns null.
+     * @param page The page the description will be retrieved from
+     * @return The description
+     */
+    @Override
+    public String getDescription(String page) {
+        try {
+            // Fetch the image page
+            Response resp = Http.url(page)
+                                .referrer(this.url)
+                                .cookies(cookies)
+                                .response();
+            cookies.putAll(resp.cookies());
 
+            // Try to find the "Download" box
+            Elements els = resp.parse().select("div[class=dev-description]");
+            if (els.size() == 0) {
+                throw new IOException("No description found");
+            }
+            // Full-size image
+            String desc = els.text(); // TODO Figure out how to preserve newlines
+            return desc;
+        } catch (IOException ioe) {
+                logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
+                return null;
+        }
+    }
+   
     /**
      * If largest resolution for image at 'thumb' is found, starts downloading
      * and returns null.
@@ -202,7 +251,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
                                 .response();
             cookies.putAll(resp.cookies());
 
-            // Try to find the "Download" box
+            // Try to find the description
             Elements els = resp.parse().select("a.dev-page-download");
             if (els.size() == 0) {
                 throw new IOException("No download page found");

From be89649466d4e77f44134b9ef6b4d5e07f68dd45 Mon Sep 17 00:00:00 2001
From: Wiiplay123 <TheRealWiiplay123@gmail.com>
Date: Sat, 29 Nov 2014 23:14:57 -0600
Subject: [PATCH 3/3] Added line breaks to deviantART ripper

Finally added the support for newlines that I've been going on about in
the other commits. Also got rid of a comment that shouldn't have been
there, as well as fixing the fact that I just broke everything two
commits back and didn't notice until just now. Sorry about that!
---
 .../ripme/ripper/DeviantartRipper.java        | 309 ------------------
 .../ripper/rippers/DeviantartRipper.java      |  16 +-
 2 files changed, 11 insertions(+), 314 deletions(-)
 delete mode 100644 src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java

diff --git a/src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java
deleted file mode 100644
index 15b4a064..00000000
--- a/src/main/java/com/rarchives/ripme/ripper/DeviantartRipper.java
+++ /dev/null
@@ -1,309 +0,0 @@
-package com.rarchives.ripme.ripper.rippers;
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.Connection.Method;
-import org.jsoup.Connection.Response;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.rarchives.ripme.ripper.AbstractHTMLRipper;
-import com.rarchives.ripme.utils.Base64;
-import com.rarchives.ripme.utils.Http;
-import com.rarchives.ripme.utils.Utils;
-
-public class DeviantartRipper extends AbstractHTMLRipper {
-
-    private static final int SLEEP_TIME = 2000;
-
-    private Map<String,String> cookies = new HashMap<String,String>();
-    private Set<String> triedURLs = new HashSet<String>();
-
-    public DeviantartRipper(URL url) throws IOException {
-        super(url);
-    }
-
-    @Override
-    public String getHost() {
-        return "deviantart";
-    }
-    @Override
-    public String getDomain() {
-        return "deviantart.com";
-    }
-
-    @Override
-    public URL sanitizeURL(URL url) throws MalformedURLException {
-        String u = url.toExternalForm();
-        String subdir = "/";
-        if (u.contains("catpath=scraps")) {
-            subdir = "scraps";
-        }
-        u = u.replaceAll("\\?.*", "?catpath=" + subdir);
-        return new URL(u);
-    }
-
-    @Override
-    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com(/gallery)?/?(\\?.*)?$");
-        Matcher m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            // Root gallery
-            if (url.toExternalForm().contains("catpath=scraps")) {
-                return m.group(1) + "_scraps";
-            }
-            else {
-                return m.group(1);
-            }
-        }
-        p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            // Subgallery
-            return m.group(1) + "_" + m.group(2);
-        }
-        p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/favou?rites/?$");
-        m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            // Subgallery
-            return m.group(1) + "_faves";
-        }
-        throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url);
-    }
-
-    @Override
-    public Document getFirstPage() throws IOException {
-        // Login
-        try {
-            cookies = loginToDeviantart();
-        } catch (Exception e) {
-            logger.warn("Failed to login: ", e);
-        }
-        return Http.url(this.url)
-                   .cookies(cookies)
-                   .get();
-    }
-
-    @Override
-    public List<String> getURLsFromPage(Document page) {
-        List<String> imageURLs = new ArrayList<String>();
-
-        // Iterate over all thumbnails
-        for (Element thumb : page.select("div.zones-container a.thumb")) {
-            if (isStopped()) {
-                break;
-            }
-            Element img = thumb.select("img").get(0);
-            if (img.attr("transparent").equals("false")) {
-                continue; // a.thumbs to other albums are invisible
-            }
-
-            // Get full-sized image via helper methods
-            String fullSize = null;
-            String desc = null;
-            try {
-                fullSize = thumbToFull(img.attr("src"), true);
-            } catch (Exception e) {
-                logger.info("Attempting to get full size image from " + thumb.attr("href"));
-                fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
-            }
-            try {
-                desc = smallToDescription(thumb.attr("href"));
-            } catch (Exception e) {
-                logger.info("Could not get description from " + thumb.attr("href"));
-            }
-            try {
-				saveText(new URL(thumb.attr("href")), "", this.url.toExternalForm(), cookies, desc);
-			} catch (MalformedURLException e) {
-				logger.info("Malformed URL while getting description from " + thumb.attr("href"));
-			}
-            if (fullSize == null) {
-                continue;
-            }
-            if (triedURLs.contains(fullSize)) {
-                logger.warn("Already tried to download " + fullSize);
-                continue;
-            }
-            triedURLs.add(fullSize);
-            imageURLs.add(fullSize);
-        }
-        return imageURLs;
-    }
-    
-    @Override
-    public Document getNextPage(Document page) throws IOException {
-        Elements nextButtons = page.select("li.next > a");
-        if (nextButtons.size() == 0) {
-            throw new IOException("No next page found");
-        }
-        Element a = nextButtons.first();
-        if (a.hasClass("disabled")) {
-            throw new IOException("Hit end of pages");
-        }
-        String nextPage = a.attr("href");
-        if (nextPage.startsWith("/")) {
-            nextPage = "http://" + this.url.getHost() + nextPage;
-        }
-        if (!sleep(SLEEP_TIME)) {
-            throw new IOException("Interrupted while waiting to load next page: " + nextPage);
-        }
-        logger.info("Found next page: " + nextPage);
-        return Http.url(nextPage)
-                   .cookies(cookies)
-                   .get();
-    }
-
-    @Override
-    public void downloadURL(URL url, int index) {
-        addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies);
-    }
-
-    /**
-     * Tries to get full size image from thumbnail URL
-     * @param thumb Thumbnail URL
-     * @param throwException Whether or not to throw exception when full size image isn't found
-     * @return Full-size image URL
-     * @throws Exception If it can't find the full-size URL
-     */
-    public static String thumbToFull(String thumb, boolean throwException) throws Exception {
-        thumb = thumb.replace("http://th", "http://fc");
-        List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/")));
-        fields.remove(4);
-        if (!fields.get(4).equals("f") && throwException) {
-            // Not a full-size image
-            throw new Exception("Can't get full size image from " + thumb);
-        }
-        StringBuilder result = new StringBuilder();
-        for (int i = 0; i < fields.size(); i++) {
-            if (i > 0) {
-                result.append("/");
-            }
-            result.append(fields.get(i));
-        }
-        return result.toString();
-    }
-    
-    /**
-     * Attempts to download description for image.
-     * Comes in handy when people put entire stories in their description.
-     * If no description was found, returns null.
-     * @param page The page the description will be retrieved from
-     * @return The description
-     */
-    public String smallToDescription(String page) {
-        try {
-            // Fetch the image page
-            Response resp = Http.url(page)
-                                .referrer(this.url)
-                                .cookies(cookies)
-                                .response();
-            cookies.putAll(resp.cookies());
-
-            // Try to find the "Download" box
-            Elements els = resp.parse().select("div[class=dev-description]");
-            if (els.size() == 0) {
-                throw new IOException("No description found");
-            }
-            // Full-size image
-            String desc = els.text(); // TODO Figure out how to preserve newlines
-            return desc;
-        } catch (IOException ioe) {
-                logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
-                return null;
-        }
-    }
-   
-    /**
-     * If largest resolution for image at 'thumb' is found, starts downloading
-     * and returns null.
-     * If it finds a larger resolution on another page, returns the image URL.
-     * @param thumb Thumbnail URL
-     * @param page Page the thumbnail is retrieved from
-     * @return Highest-resolution version of the image based on thumbnail URL and the page.
-     */
-    public String smallToFull(String thumb, String page) {
-        try {
-            // Fetch the image page
-            Response resp = Http.url(page)
-                                .referrer(this.url)
-                                .cookies(cookies)
-                                .response();
-            cookies.putAll(resp.cookies());
-
-            // Try to find the description
-            Elements els = resp.parse().select("a.dev-page-download");
-            if (els.size() == 0) {
-                throw new IOException("No download page found");
-            }
-            // Full-size image
-            String fsimage = els.get(0).attr("href");
-            return fsimage;
-        } catch (IOException ioe) {
-            try {
-                logger.info("Failed to get full size download image at " + page + " : '" + ioe.getMessage() + "'");
-                String lessThanFull = thumbToFull(thumb, false);
-                logger.info("Falling back to less-than-full-size image " + lessThanFull);
-                return lessThanFull;
-            } catch (Exception e) {
-                return null;
-            }
-        }
-    }
-
-    /**
-     * Logs into deviant art. Required to rip full-size NSFW content.
-     * @return Map of cookies containing session data.
-     */
-    private Map<String, String> loginToDeviantart() throws IOException {
-        // Populate postData fields
-        Map<String,String> postData = new HashMap<String,String>();
-        String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
-        String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
-        if (username == null || password == null) {
-            throw new IOException("could not find username or password in config");
-        }
-        Response resp = Http.url("http://www.deviantart.com/")
-                            .response();
-        for (Element input : resp.parse().select("form#form-login input[type=hidden]")) {
-            postData.put(input.attr("name"), input.attr("value"));
-        }
-        postData.put("username", username);
-        postData.put("password", password);
-        postData.put("remember_me", "1");
-
-        // Send login request
-        resp = Http.url("https://www.deviantart.com/users/login")
-                    .userAgent(USER_AGENT)
-                    .data(postData)
-                    .cookies(resp.cookies())
-                    .method(Method.POST)
-                    .response();
-
-        // Assert we are logged in
-        if (resp.hasHeader("Location") && resp.header("Location").contains("password")) {
-            // Wrong password
-            throw new IOException("Wrong password");
-        }
-        if (resp.url().toExternalForm().contains("bad_form")) {
-            throw new IOException("Login form was incorrectly submitted");
-        }
-        if (resp.cookie("auth_secure") == null ||
-            resp.cookie("auth") == null) {
-            throw new IOException("No auth_secure or auth cookies received");
-        }
-        // We are logged in, save the cookies
-        return resp.cookies();
-    }
-}
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
index 4e9c51c8..f7f1ed93 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
@@ -15,8 +15,10 @@ import java.util.regex.Pattern;
 
 import org.jsoup.Connection.Method;
 import org.jsoup.Connection.Response;
+import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.safety.Whitelist;
 import org.jsoup.select.Elements;
 
 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@@ -220,14 +222,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
                                 .response();
             cookies.putAll(resp.cookies());
 
-            // Try to find the "Download" box
+            // Try to find the description
             Elements els = resp.parse().select("div[class=dev-description]");
             if (els.size() == 0) {
                 throw new IOException("No description found");
             }
-            // Full-size image
-            String desc = els.text(); // TODO Figure out how to preserve newlines
-            return desc;
+            Document documentz = resp.parse();
+            Element ele = documentz.select("div[class=dev-description]").get(0);
+            documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
+            ele.select("br").append("\\n");
+            ele.select("p").prepend("\\n\\n");
+            return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
+            // TODO Make this not make a newline if someone just types \n into the description.
         } catch (IOException ioe) {
                 logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
                 return null;
@@ -251,7 +257,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
                                 .response();
             cookies.putAll(resp.cookies());
 
-            // Try to find the description
+            // Try to find the download button
             Elements els = resp.parse().select("a.dev-page-download");
             if (els.size() == 0) {
                 throw new IOException("No download page found");