diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index be73c717..6953704c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -1,5 +1,7 @@ package com.rarchives.ripme.ripper; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -14,7 +16,7 @@ import com.rarchives.ripme.utils.Utils; * Simplified ripper, designed for ripping from sites by parsing HTML. */ public abstract class AbstractHTMLRipper extends AlbumRipper { - + public AbstractHTMLRipper(URL url) throws IOException { super(url); } @@ -27,6 +29,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { throw new IOException("getNextPage not implemented"); } public abstract List getURLsFromPage(Document page); + public List getDescriptionsFromPage(Document doc) throws IOException { + throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? + } public abstract void downloadURL(URL url, int index); public DownloadThreadPool getThreadPool() { return null; @@ -45,21 +50,27 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { public URL sanitizeURL(URL url) throws MalformedURLException { return url; } - + public boolean hasDescriptionSupport() { + return false; + } + public String getDescription(String page) throws IOException { + throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? + } @Override public void rip() throws IOException { int index = 0; + int textindex = 0; logger.info("Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); Document doc = getFirstPage(); - + while (doc != null) { List imageURLs = getURLsFromPage(doc); if (imageURLs.size() == 0) { throw new IOException("No images found at " + doc.location()); } - + for (String imageURL : imageURLs) { if (isStopped()) { break; @@ -67,6 +78,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { index += 1; downloadURL(new URL(imageURL), index); } + if (hasDescriptionSupport()) { + List textURLs = getDescriptionsFromPage(doc); + if (textURLs.size() > 0) { + for (String textURL : textURLs) { + if (isStopped()) { + break; + } + textindex += 1; + String tempDesc = getDescription(textURL); + if (tempDesc != null) { + saveText(new URL(textURL), "", tempDesc, textindex); + } + } + } + } if (isStopped()) { break; @@ -87,7 +113,46 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } waitForThreads(); } - + public boolean saveText(URL url, String subdirectory, String text, int index) { + try { + stopCheck(); + } catch (IOException e) { + return false; + } + String saveAs = url.toExternalForm(); + saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); + if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } + if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } + if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } + if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + File saveFileAs; + try { + if (!subdirectory.equals("")) { // Not sure about this part + subdirectory = File.separator + subdirectory; + } + // TODO Get prefix working again, probably requires reworking a lot of stuff! + saveFileAs = new File( + workingDir.getCanonicalPath() + + subdirectory + + File.separator + + getPrefix(index) + + saveAs + + ".txt"); + // Write the file + FileOutputStream out = (new FileOutputStream(saveFileAs)); + out.write(text.getBytes()); + out.close(); + } catch (IOException e) { + logger.error("[!] Error creating save file path for description '" + url + "':", e); + return false; + } + logger.debug("Downloading " + url + "'s description to " + saveFileAs); + if (!saveFileAs.getParentFile().exists()) { + logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent())); + saveFileAs.getParentFile().mkdirs(); + } + return true; + } public String getPrefix(int index) { String prefix = ""; if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index e1ea4a23..33b5fd45 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -132,6 +132,7 @@ public abstract class AbstractRipper return addURLToDownload(url, saveFileAs, referrer, cookies); } + /** * Queues file to be downloaded and saved. With options. * @param url diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 2c88c690..f7f1ed93 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -15,8 +15,10 @@ import java.util.regex.Pattern; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; +import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.safety.Whitelist; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; @@ -43,7 +45,10 @@ public class DeviantartRipper extends AbstractHTMLRipper { public String getDomain() { return "deviantart.com"; } - + @Override + public boolean hasDescriptionSupport() { + return true; + } @Override public URL sanitizeURL(URL url) throws MalformedURLException { String u = url.toExternalForm(); @@ -118,7 +123,6 @@ public class DeviantartRipper extends AbstractHTMLRipper { logger.info("Attempting to get full size image from " + thumb.attr("href")); fullSize = smallToFull(img.attr("src"), thumb.attr("href")); } - if (fullSize == null) { continue; } @@ -131,7 +135,23 @@ public class DeviantartRipper extends AbstractHTMLRipper { } return imageURLs; } - + @Override + public List getDescriptionsFromPage(Document page) { + List textURLs = new ArrayList(); + + // Iterate over all thumbnails + for (Element thumb : page.select("div.zones-container a.thumb")) { + if (isStopped()) { + break; + } + Element img = thumb.select("img").get(0); + if (img.attr("transparent").equals("false")) { + continue; // a.thumbs to other albums are invisible + } + textURLs.add(thumb.attr("href")); + } + return textURLs; + } @Override public Document getNextPage(Document page) throws IOException { Elements nextButtons = page.select("li.next > a"); @@ -184,7 +204,42 @@ public class DeviantartRipper extends AbstractHTMLRipper { } return result.toString(); } + + /** + * Attempts to download description for image. + * Comes in handy when people put entire stories in their description. + * If no description was found, returns null. + * @param page The page the description will be retrieved from + * @return The description + */ + @Override + public String getDescription(String page) { + try { + // Fetch the image page + Response resp = Http.url(page) + .referrer(this.url) + .cookies(cookies) + .response(); + cookies.putAll(resp.cookies()); + // Try to find the description + Elements els = resp.parse().select("div[class=dev-description]"); + if (els.size() == 0) { + throw new IOException("No description found"); + } + Document documentz = resp.parse(); + Element ele = documentz.select("div[class=dev-description]").get(0); + documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); + ele.select("br").append("\\n"); + ele.select("p").prepend("\\n\\n"); + return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); + // TODO Make this not make a newline if someone just types \n into the description. + } catch (IOException ioe) { + logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); + return null; + } + } + /** * If largest resolution for image at 'thumb' is found, starts downloading * and returns null. @@ -202,7 +257,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { .response(); cookies.putAll(resp.cookies()); - // Try to find the "Download" box + // Try to find the download button Elements els = resp.parse().select("a.dev-page-download"); if (els.size() == 0) { throw new IOException("No download page found"); diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index 99a4a667..c51de33e 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -231,10 +231,12 @@ public class Utils { classes.add(Class.forName(className)); } catch (ClassNotFoundException e) { logger.error("ClassNotFoundException loading " + className); + jarFile.close(); // Resource leak fix? throw new RuntimeException("ClassNotFoundException loading " + className); } } } + jarFile.close(); // Eclipse said not closing it would have a resource leak } catch (IOException e) { logger.error("Error while loading jar file:", e); throw new RuntimeException(pkgname + " (" + directory + ") does not appear to be a valid package", e);