Merge pull request #144 from Wiiplay123/master

Added deviantART description ripping
2015-01-10 15:31:46 -08:00 · 2015-01-10 15:31:46 -08:00 · 27bda1bc9f
commit 27bda1bc9f
parent 9770dfed32 be89649466
4 changed files with 132 additions and 9 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@ -1,5 +1,7 @@
 package com.rarchives.ripme.ripper;

+import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
@ -14,7 +16,7 @@ import com.rarchives.ripme.utils.Utils;
 * Simplified ripper, designed for ripping from sites by parsing HTML.
 */
 public abstract class AbstractHTMLRipper extends AlbumRipper {
-
+	
    public AbstractHTMLRipper(URL url) throws IOException {
        super(url);
    }
@ -27,6 +29,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
        throw new IOException("getNextPage not implemented");
    }
    public abstract List<String> getURLsFromPage(Document page);
+    public List<String> getDescriptionsFromPage(Document doc) throws IOException {
+    	throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
+    }
    public abstract void downloadURL(URL url, int index);
    public DownloadThreadPool getThreadPool() {
        return null;
@ -45,21 +50,27 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
    public URL sanitizeURL(URL url) throws MalformedURLException {
        return url;
    }
-
+    public boolean hasDescriptionSupport() {
+		return false;
+    }
+    public String getDescription(String page) throws IOException {
+    	throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
+    }
    @Override
    public void rip() throws IOException {
        int index = 0;
+        int textindex = 0;
        logger.info("Retrieving " + this.url);
        sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
        Document doc = getFirstPage();
-
+        
        while (doc != null) {
            List<String> imageURLs = getURLsFromPage(doc);

            if (imageURLs.size() == 0) {
                throw new IOException("No images found at " + doc.location());
            }
-
+            
            for (String imageURL : imageURLs) {
                if (isStopped()) {
                    break;
@ -67,6 +78,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
                index += 1;
                downloadURL(new URL(imageURL), index);
            }
+            if (hasDescriptionSupport()) {
+            	List<String> textURLs = getDescriptionsFromPage(doc);
+            	if (textURLs.size() > 0) {
+            		for (String textURL : textURLs) {
+            			if (isStopped()) {
+            				break;
+            			}
+            			textindex += 1;
+            			String tempDesc = getDescription(textURL);
+            			if (tempDesc != null) {
+            				saveText(new URL(textURL), "", tempDesc, textindex);
+            			}
+            		}
+            	}
+            }

            if (isStopped()) {
                break;
@ -87,7 +113,46 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
        }
        waitForThreads();
    }
-
+    public boolean saveText(URL url, String subdirectory, String text, int index) {
+        try {
+            stopCheck();
+        } catch (IOException e) {
+            return false;
+        }
+        String saveAs = url.toExternalForm();
+        saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
+        if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
+        if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
+        if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
+        if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
+        File saveFileAs;
+        try {
+            if (!subdirectory.equals("")) { // Not sure about this part
+                subdirectory = File.separator + subdirectory;
+            }
+            // TODO Get prefix working again, probably requires reworking a lot of stuff!
+            saveFileAs = new File(
+                    workingDir.getCanonicalPath()
+                    + subdirectory
+                    + File.separator
+                    + getPrefix(index)
+                    + saveAs
+                    + ".txt");
+            // Write the file
+            FileOutputStream out = (new FileOutputStream(saveFileAs));
+            out.write(text.getBytes());
+            out.close();
+        } catch (IOException e) {
+            logger.error("[!] Error creating save file path for description '" + url + "':", e);
+            return false;
+        }
+        logger.debug("Downloading " + url + "'s description to " + saveFileAs);
+        if (!saveFileAs.getParentFile().exists()) {
+            logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
+            saveFileAs.getParentFile().mkdirs();
+        }
+        return true;
+    }
    public String getPrefix(int index) {
        String prefix = "";
        if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java
@ -132,6 +132,7 @@ public abstract class AbstractRipper
        return addURLToDownload(url, saveFileAs, referrer, cookies);
    }
    
+    
    /**
     * Queues file to be downloaded and saved. With options.
     * @param url
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
@ -15,8 +15,10 @@ import java.util.regex.Pattern;

 import org.jsoup.Connection.Method;
 import org.jsoup.Connection.Response;
+import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.safety.Whitelist;
 import org.jsoup.select.Elements;

 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
@ -43,7 +45,10 @@ public class DeviantartRipper extends AbstractHTMLRipper {
    public String getDomain() {
        return "deviantart.com";
    }
-
+    @Override
+    public boolean hasDescriptionSupport() {
+		return true;
+    }
    @Override
    public URL sanitizeURL(URL url) throws MalformedURLException {
        String u = url.toExternalForm();
@ -118,7 +123,6 @@ public class DeviantartRipper extends AbstractHTMLRipper {
                logger.info("Attempting to get full size image from " + thumb.attr("href"));
                fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
            }
-
            if (fullSize == null) {
                continue;
            }
@ -131,7 +135,23 @@ public class DeviantartRipper extends AbstractHTMLRipper {
        }
        return imageURLs;
    }
-    
+    @Override
+    public List<String> getDescriptionsFromPage(Document page) {
+        List<String> textURLs = new ArrayList<String>();
+
+        // Iterate over all thumbnails
+        for (Element thumb : page.select("div.zones-container a.thumb")) {
+            if (isStopped()) {
+                break;
+            }
+            Element img = thumb.select("img").get(0);
+            if (img.attr("transparent").equals("false")) {
+                continue; // a.thumbs to other albums are invisible
+            }
+            textURLs.add(thumb.attr("href"));
+        }
+        return textURLs;
+    }
    @Override
    public Document getNextPage(Document page) throws IOException {
        Elements nextButtons = page.select("li.next > a");
@ -184,7 +204,42 @@ public class DeviantartRipper extends AbstractHTMLRipper {
        }
        return result.toString();
    }
+    
+    /**
+     * Attempts to download description for image.
+     * Comes in handy when people put entire stories in their description.
+     * If no description was found, returns null.
+     * @param page The page the description will be retrieved from
+     * @return The description
+     */
+    @Override
+    public String getDescription(String page) {
+        try {
+            // Fetch the image page
+            Response resp = Http.url(page)
+                                .referrer(this.url)
+                                .cookies(cookies)
+                                .response();
+            cookies.putAll(resp.cookies());

+            // Try to find the description
+            Elements els = resp.parse().select("div[class=dev-description]");
+            if (els.size() == 0) {
+                throw new IOException("No description found");
+            }
+            Document documentz = resp.parse();
+            Element ele = documentz.select("div[class=dev-description]").get(0);
+            documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
+            ele.select("br").append("\\n");
+            ele.select("p").prepend("\\n\\n");
+            return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
+            // TODO Make this not make a newline if someone just types \n into the description.
+        } catch (IOException ioe) {
+                logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
+                return null;
+        }
+    }
+   
    /**
     * If largest resolution for image at 'thumb' is found, starts downloading
     * and returns null.
@ -202,7 +257,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
                                .response();
            cookies.putAll(resp.cookies());

-            // Try to find the "Download" box
+            // Try to find the download button
            Elements els = resp.parse().select("a.dev-page-download");
            if (els.size() == 0) {
                throw new IOException("No download page found");
--- a/src/main/java/com/rarchives/ripme/utils/Utils.java
+++ b/src/main/java/com/rarchives/ripme/utils/Utils.java
@ -231,10 +231,12 @@ public class Utils {
                            classes.add(Class.forName(className));
                        } catch (ClassNotFoundException e) {
                            logger.error("ClassNotFoundException loading " + className);
+                            jarFile.close(); // Resource leak fix?
                            throw new RuntimeException("ClassNotFoundException loading " + className);
                        }
                    }
                }
+                jarFile.close(); // Eclipse said not closing it would have a resource leak
            } catch (IOException e) {
                logger.error("Error while loading jar file:", e);
                throw new RuntimeException(pkgname + " (" + directory + ") does not appear to be a valid package", e);