From f2afb840ddd56281782f56f9cd1e21a0ff2e1a7a Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Wed, 26 Apr 2017 23:13:11 -0500 Subject: [PATCH] Fixed deviantArt description file name issue. The deviantArt description file name will now match the image file name, rather than the URL. --- .../ripme/ripper/AbstractHTMLRipper.java | 26 +++++---- .../ripper/rippers/DeviantartRipper.java | 54 ++++++++++++------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 35b2aa43..6111db79 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -53,7 +53,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { public boolean hasDescriptionSupport() { return false; } - public String getDescription(String page) throws IOException { + public String[] getDescription(String url,Document page) throws IOException { throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { @@ -95,15 +95,16 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { logger.debug("Found description link(s) from " + doc.location()); for (String textURL : textURLs) { if (isStopped()) { + break; } textindex += 1; logger.debug("Getting description from " + textURL); sleep(descSleepTime()); - String tempDesc = getDescription(textURL); + String[] tempDesc = getDescription(textURL,doc); if (tempDesc != null) { - logger.debug("Got description: " + tempDesc); - saveText(new URL(textURL), "", tempDesc, textindex); + logger.debug("Got description from " + textURL); + saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]); } } } @@ -130,18 +131,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { waitForThreads(); } public boolean saveText(URL url, String subdirectory, String text, int index) { - // Not the best for some cases, like FurAffinity. Overridden there. - try { - stopCheck(); - } catch (IOException e) { - return false; - } String saveAs = url.toExternalForm(); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + return saveText(url,subdirectory,text,index,saveAs); + } + public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) { + // Not the best for some cases, like FurAffinity. Overridden there. + try { + stopCheck(); + } catch (IOException e) { + return false; + } File saveFileAs; try { if (!subdirectory.equals("")) { // Not sure about this part @@ -153,7 +157,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { + subdirectory + File.separator + getPrefix(index) - + saveAs + + fileName + ".txt"); // Write the file FileOutputStream out = (new FileOutputStream(saveFileAs)); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 994efa7a..b717437f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -119,7 +119,19 @@ public class DeviantartRipper extends AbstractHTMLRipper { .cookies(cookies) .get(); } - + public String jsonToImage(Document page,String id) { + Elements js = page.select("script[type=\"text/javascript\"]"); + for (Element tag : js) { + if (tag.html().contains("window.__pageload")) { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + script = script.substring(script.indexOf(id)); + script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id + return script.replace("\\/","/"); + } + } + return null; + } @Override public List getURLsFromPage(Document page) { List imageURLs = new ArrayList(); @@ -139,18 +151,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { fullSize = thumb.attr("data-super-full-img"); } else { String spanUrl = thumb.attr("href"); - // id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1) - Elements js = page.select("script[type=\"text/javascript\"]"); - for (Element tag : js) { - if (tag.html().contains("window.__pageload")) { - String script = tag.html(); - script = script.substring(script.indexOf("window.__pageload")); - script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1))); - script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id - fullSize = script.replace("\\/","/"); - break; - } - } + fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); if (fullSize == null) { try { fullSize = thumbToFull(img.attr("src"), true); @@ -263,17 +264,18 @@ public class DeviantartRipper extends AbstractHTMLRipper { * Attempts to download description for image. * Comes in handy when people put entire stories in their description. * If no description was found, returns null. - * @param page The page the description will be retrieved from - * @return The description + * @param url The URL the description will be retrieved from + * @param page The gallery page the URL was found on + * @return A String[] with first object being the description, and the second object being image file name if found. */ @Override - public String getDescription(String page) { + public String[] getDescription(String url,Document page) { if (isThisATest()) { return null; } try { // Fetch the image page - Response resp = Http.url(page) + Response resp = Http.url(url) .referrer(this.url) .cookies(cookies) .response(); @@ -289,7 +291,23 @@ public class DeviantartRipper extends AbstractHTMLRipper { documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); - return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); + String fullSize = null; + Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0); + if (!thumb.attr("data-super-full-img").isEmpty()) { + fullSize = thumb.attr("data-super-full-img"); + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } else { + String spanUrl = thumb.attr("href"); + fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } + if (fullSize == null) { + return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; + } + fullSize = fullSize.substring(0,fullSize.lastIndexOf(".")); + return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");