From c3d1c7b6541ddaca689334ff3b2d0719e27f473d Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Tue, 25 Apr 2017 20:17:26 -0500 Subject: [PATCH 1/8] Fixed deviantArt ripping Uses the data-super-full-img attribute of thumbnails as first attempt to get a full image URL. If that doesn't work (as is the case with mature items), the JSON is used. thumbToFull is still broken in this commit, but shouldn't be needed. --- .../ripper/rippers/DeviantartRipper.java | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index b170c2e7..7dcd2747 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -125,7 +125,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { List imageURLs = new ArrayList(); // Iterate over all thumbnails - for (Element thumb : page.select("div.zones-container a.thumb")) { + for (Element thumb : page.select("div.zones-container span.thumb")) { if (isStopped()) { break; } @@ -133,15 +133,33 @@ public class DeviantartRipper extends AbstractHTMLRipper { if (img.attr("transparent").equals("false")) { continue; // a.thumbs to other albums are invisible } - // Get full-sized image via helper methods String fullSize = null; - try { - fullSize = thumbToFull(img.attr("src"), true); - } catch (Exception e) { - logger.info("Attempting to get full size image from " + thumb.attr("href")); - fullSize = smallToFull(img.attr("src"), thumb.attr("href")); - } + if (!thumb.attr("data-super-full-img").isEmpty()) { + fullSize = thumb.attr("data-super-full-img"); + } else { + String spanUrl = thumb.attr("href"); + // id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1) + Elements js = page.select("script[type=\"text/javascript\"]"); + for (Element tag : js) { + if (tag.html().contains("window.__pageload")) { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1))); + script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id + fullSize = script.replace("\\/","/"); + break; + } + } + if (fullSize == null) { + try { + fullSize = thumbToFull(img.attr("src"), true); + } catch (Exception e) { + logger.info("Attempting to get full size image from " + thumb.attr("href")); + fullSize = smallToFull(img.attr("src"), thumb.attr("href")); + } + } + } if (fullSize == null) { continue; } From 8da945a8fe555c8e9c4c584e2a943d77177976d1 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Tue, 25 Apr 2017 21:18:03 -0500 Subject: [PATCH 2/8] Fixed deviantArt description ripping again Forgot to change an "a" to a "span", fixed now. --- .../com/rarchives/ripme/ripper/rippers/DeviantartRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 7dcd2747..994efa7a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -180,9 +180,9 @@ public class DeviantartRipper extends AbstractHTMLRipper { @Override public List getDescriptionsFromPage(Document page) { List textURLs = new ArrayList(); - // Iterate over all thumbnails - for (Element thumb : page.select("div.zones-container a.thumb")) { + for (Element thumb : page.select("div.zones-container span.thumb")) { + logger.info(thumb.attr("href")); if (isStopped()) { break; } @@ -191,6 +191,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { continue; // a.thumbs to other albums are invisible } textURLs.add(thumb.attr("href")); + } return textURLs; } From f2afb840ddd56281782f56f9cd1e21a0ff2e1a7a Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Wed, 26 Apr 2017 23:13:11 -0500 Subject: [PATCH 3/8] Fixed deviantArt description file name issue. The deviantArt description file name will now match the image file name, rather than the URL. --- .../ripme/ripper/AbstractHTMLRipper.java | 26 +++++---- .../ripper/rippers/DeviantartRipper.java | 54 ++++++++++++------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 35b2aa43..6111db79 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -53,7 +53,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { public boolean hasDescriptionSupport() { return false; } - public String getDescription(String page) throws IOException { + public String[] getDescription(String url,Document page) throws IOException { throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { @@ -95,15 +95,16 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { logger.debug("Found description link(s) from " + doc.location()); for (String textURL : textURLs) { if (isStopped()) { + break; } textindex += 1; logger.debug("Getting description from " + textURL); sleep(descSleepTime()); - String tempDesc = getDescription(textURL); + String[] tempDesc = getDescription(textURL,doc); if (tempDesc != null) { - logger.debug("Got description: " + tempDesc); - saveText(new URL(textURL), "", tempDesc, textindex); + logger.debug("Got description from " + textURL); + saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]); } } } @@ -130,18 +131,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { waitForThreads(); } public boolean saveText(URL url, String subdirectory, String text, int index) { - // Not the best for some cases, like FurAffinity. Overridden there. - try { - stopCheck(); - } catch (IOException e) { - return false; - } String saveAs = url.toExternalForm(); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + return saveText(url,subdirectory,text,index,saveAs); + } + public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) { + // Not the best for some cases, like FurAffinity. Overridden there. + try { + stopCheck(); + } catch (IOException e) { + return false; + } File saveFileAs; try { if (!subdirectory.equals("")) { // Not sure about this part @@ -153,7 +157,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { + subdirectory + File.separator + getPrefix(index) - + saveAs + + fileName + ".txt"); // Write the file FileOutputStream out = (new FileOutputStream(saveFileAs)); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 994efa7a..b717437f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -119,7 +119,19 @@ public class DeviantartRipper extends AbstractHTMLRipper { .cookies(cookies) .get(); } - + public String jsonToImage(Document page,String id) { + Elements js = page.select("script[type=\"text/javascript\"]"); + for (Element tag : js) { + if (tag.html().contains("window.__pageload")) { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + script = script.substring(script.indexOf(id)); + script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id + return script.replace("\\/","/"); + } + } + return null; + } @Override public List getURLsFromPage(Document page) { List imageURLs = new ArrayList(); @@ -139,18 +151,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { fullSize = thumb.attr("data-super-full-img"); } else { String spanUrl = thumb.attr("href"); - // id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1) - Elements js = page.select("script[type=\"text/javascript\"]"); - for (Element tag : js) { - if (tag.html().contains("window.__pageload")) { - String script = tag.html(); - script = script.substring(script.indexOf("window.__pageload")); - script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1))); - script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id - fullSize = script.replace("\\/","/"); - break; - } - } + fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); if (fullSize == null) { try { fullSize = thumbToFull(img.attr("src"), true); @@ -263,17 +264,18 @@ public class DeviantartRipper extends AbstractHTMLRipper { * Attempts to download description for image. * Comes in handy when people put entire stories in their description. * If no description was found, returns null. - * @param page The page the description will be retrieved from - * @return The description + * @param url The URL the description will be retrieved from + * @param page The gallery page the URL was found on + * @return A String[] with first object being the description, and the second object being image file name if found. */ @Override - public String getDescription(String page) { + public String[] getDescription(String url,Document page) { if (isThisATest()) { return null; } try { // Fetch the image page - Response resp = Http.url(page) + Response resp = Http.url(url) .referrer(this.url) .cookies(cookies) .response(); @@ -289,7 +291,23 @@ public class DeviantartRipper extends AbstractHTMLRipper { documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); - return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); + String fullSize = null; + Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0); + if (!thumb.attr("data-super-full-img").isEmpty()) { + fullSize = thumb.attr("data-super-full-img"); + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } else { + String spanUrl = thumb.attr("href"); + fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } + if (fullSize == null) { + return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; + } + fullSize = fullSize.substring(0,fullSize.lastIndexOf(".")); + return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); From 4cf9f09293390d4ce2aca4cc6c78f1201a10b8a7 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Thu, 27 Apr 2017 00:18:42 -0500 Subject: [PATCH 4/8] Fixed deviantArt "no next page found" error The pagination buttons are gone. The "coffset" buttons for comment pages, not gallery pages. I patched this by using a link found in the HTML itself that I think is used by JavaScript to generate the pagination buttons. --- .../ripme/ripper/rippers/DeviantartRipper.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index b717437f..cde05d9f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -201,14 +201,15 @@ public class DeviantartRipper extends AbstractHTMLRipper { if (isThisATest()) { return null; } - Elements nextButtons = page.select("li.next > a"); + Elements nextButtons = page.select("link[rel=\"next\"]"); if (nextButtons.size() == 0) { - throw new IOException("No next page found"); + if (page.select("link[rel=\"prev\"]").size() == 0) { + throw new IOException("No next page found"); + } else { + throw new IOException("Hit end of pages"); + } } Element a = nextButtons.first(); - if (a.hasClass("disabled")) { - throw new IOException("Hit end of pages"); - } String nextPage = a.attr("href"); if (nextPage.startsWith("/")) { nextPage = "http://" + this.url.getHost() + nextPage; @@ -306,7 +307,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { if (fullSize == null) { return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; } - fullSize = fullSize.substring(0,fullSize.lastIndexOf(".")); + fullSize = fullSize.substring(0, fullSize.lastIndexOf(".")); return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { From 8f409eb28ac5e270dd76eeee88db8d9613857172 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Sat, 29 Apr 2017 14:35:39 -0500 Subject: [PATCH 5/8] Fixed deviantArt downloading low res when higher res is available Also fixed a few bugs related to non-image items that crashed the rip. --- .../ripme/ripper/AbstractHTMLRipper.java | 32 ++++-- .../ripper/rippers/DeviantartRipper.java | 102 ++++++++++++------ 2 files changed, 91 insertions(+), 43 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 6111db79..99ce1fec 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -57,7 +57,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { - return 0; + return 100; } @Override public void rip() throws IOException { @@ -95,17 +95,27 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { logger.debug("Found description link(s) from " + doc.location()); for (String textURL : textURLs) { if (isStopped()) { - break; } textindex += 1; logger.debug("Getting description from " + textURL); - sleep(descSleepTime()); - String[] tempDesc = getDescription(textURL,doc); - if (tempDesc != null) { - logger.debug("Got description from " + textURL); - saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]); - } + String[] tempDesc = getDescription(textURL,doc); + if (tempDesc != null) { + if (Utils.getConfigBoolean("file.overwrite", false) || !(new File( + workingDir.getCanonicalPath() + + "" + + File.separator + + getPrefix(index) + + (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL))) + + ".txt").exists())) { + logger.debug("Got description from " + textURL); + saveText(new URL(textURL), "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL)))); + sleep(descSleepTime()); + } else { + logger.debug("Description from " + textURL + " already exists."); + } + } + } } } @@ -130,13 +140,17 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } waitForThreads(); } - public boolean saveText(URL url, String subdirectory, String text, int index) { + public String fileNameFromURL(URL url) { String saveAs = url.toExternalForm(); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + return saveAs; + } + public boolean saveText(URL url, String subdirectory, String text, int index) { + String saveAs = fileNameFromURL(url); return saveText(url,subdirectory,text,index,saveAs); } public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index cde05d9f..24816992 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -1,6 +1,7 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; +import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -114,6 +115,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { cookies = loginToDeviantart(); } catch (Exception e) { logger.warn("Failed to login: ", e); + cookies.put("agegate_state","1"); // Bypasses the age gate } return Http.url(this.url) .cookies(cookies) @@ -123,11 +125,18 @@ public class DeviantartRipper extends AbstractHTMLRipper { Elements js = page.select("script[type=\"text/javascript\"]"); for (Element tag : js) { if (tag.html().contains("window.__pageload")) { - String script = tag.html(); - script = script.substring(script.indexOf("window.__pageload")); - script = script.substring(script.indexOf(id)); - script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id - return script.replace("\\/","/"); + try { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + if (script.indexOf(id) < 0) { + continue; + } + script = script.substring(script.indexOf(id)); + script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); // first },"src":"url" after id + return script.replace("\\/", "/"); + } catch (StringIndexOutOfBoundsException e) { + logger.debug("Unable to get json link from " + page.location()); + } } } return null; @@ -147,22 +156,26 @@ public class DeviantartRipper extends AbstractHTMLRipper { } // Get full-sized image via helper methods String fullSize = null; - if (!thumb.attr("data-super-full-img").isEmpty()) { + if (thumb.attr("data-super-full-img").contains("//orig")) { fullSize = thumb.attr("data-super-full-img"); } else { String spanUrl = thumb.attr("href"); - fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); - if (fullSize == null) { - try { - fullSize = thumbToFull(img.attr("src"), true); - } catch (Exception e) { - logger.info("Attempting to get full size image from " + thumb.attr("href")); - fullSize = smallToFull(img.attr("src"), thumb.attr("href")); - } + String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + if (fullSize1 == null || !fullSize1.contains("//orig")) { + fullSize = smallToFull(img.attr("src"), spanUrl); } + if (fullSize == null && fullSize1 != null) { + fullSize = fullSize1; + } } if (fullSize == null) { - continue; + if (thumb.attr("data-super-full-img") != null) { + fullSize = thumb.attr("data-super-full-img"); + } else if (thumb.attr("data-super-img") != null) { + fullSize = thumb.attr("data-super-img"); + } else { + continue; + } } if (triedURLs.contains(fullSize)) { logger.warn("Already tried to download " + fullSize); @@ -283,12 +296,11 @@ public class DeviantartRipper extends AbstractHTMLRipper { cookies.putAll(resp.cookies()); // Try to find the description - Elements els = resp.parse().select("div[class=dev-description]"); - if (els.size() == 0) { + Document documentz = resp.parse(); + Element ele = documentz.select("div.dev-description").first(); + if (ele == null) { throw new IOException("No description found"); } - Document documentz = resp.parse(); - Element ele = documentz.select("div[class=dev-description]").get(0); documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); @@ -301,8 +313,10 @@ public class DeviantartRipper extends AbstractHTMLRipper { } else { String spanUrl = thumb.attr("href"); fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); - String[] split = fullSize.split("/"); - fullSize = split[split.length - 1]; + if (fullSize != null) { + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } } if (fullSize == null) { return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; @@ -311,7 +325,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { - logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); + logger.info("Failed to get description at " + url + ": '" + ioe.getMessage() + "'"); return null; } } @@ -332,23 +346,43 @@ public class DeviantartRipper extends AbstractHTMLRipper { .cookies(cookies) .response(); cookies.putAll(resp.cookies()); - - // Try to find the download button Document doc = resp.parse(); - Elements els = doc.select("a.dev-page-download"); - if (els.size() > 0) { - // Full-size image - String fsimage = els.get(0).attr("href"); - logger.info("Found download page: " + fsimage); - return fsimage; - } - + Elements els = doc.select("img.dev-content-full"); + String fsimage = null; // Get the largest resolution image on the page - els = doc.select("img.dev-content-full"); if (els.size() > 0) { // Large image - String fsimage = els.get(0).attr("src"); + fsimage = els.get(0).attr("src"); logger.info("Found large-scale: " + fsimage); + if (fsimage.contains("//orig")) { + return fsimage; + } + } + // Try to find the download button + els = doc.select("a.dev-page-download"); + if (els.size() > 0) { + // Full-size image + String downloadLink = els.get(0).attr("href"); + logger.info("Found download page: " + downloadLink); + HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection(); + con.setRequestProperty("Referer",this.url.toString()); + String cookieString = ""; + for (Map.Entry entry : cookies.entrySet()) { + cookieString = cookieString + entry.getKey() + "=" + entry.getValue() + "; "; + } + cookieString = cookieString.substring(0,cookieString.length() - 1); + con.setRequestProperty("Cookie",cookieString); + con.setRequestProperty("User-Agent",this.USER_AGENT); + con.setInstanceFollowRedirects(true); + con.connect(); + int code = con.getResponseCode(); + String location = con.getHeaderField("Location"); + con.disconnect(); + if (location.contains("//orig")) { + fsimage = location; + } + } + if (fsimage != null) { return fsimage; } throw new IOException("No download page found"); From 624c28befa811b2c205f8ed31158f082758a2d2e Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Sat, 29 Apr 2017 21:07:49 -0500 Subject: [PATCH 6/8] Fixed deviantArt download link resolving Turns out getURL works perfectly fine, but the Location header doesn't. --- .../com/rarchives/ripme/ripper/rippers/DeviantartRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 24816992..5bb0fae3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -363,7 +363,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { if (els.size() > 0) { // Full-size image String downloadLink = els.get(0).attr("href"); - logger.info("Found download page: " + downloadLink); + logger.info("Found download button link: " + downloadLink); HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection(); con.setRequestProperty("Referer",this.url.toString()); String cookieString = ""; @@ -376,10 +376,11 @@ public class DeviantartRipper extends AbstractHTMLRipper { con.setInstanceFollowRedirects(true); con.connect(); int code = con.getResponseCode(); - String location = con.getHeaderField("Location"); + String location = con.getURL().toString(); con.disconnect(); if (location.contains("//orig")) { fsimage = location; + logger.info("Found image download: " + location); } } if (fsimage != null) { From 3d359be95866720bb9133dd62abd6d2935671d41 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Sun, 14 May 2017 20:15:27 -0500 Subject: [PATCH 7/8] Added precaution for URLs with slashes at the end when determining file name Made for my FurAffinity branch, brought it over to master branch because it could come in handy for other things. --- src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 99ce1fec..300e8a64 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -142,6 +142,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } public String fileNameFromURL(URL url) { String saveAs = url.toExternalForm(); + if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } From 2055cb0d9d9449e323e29a0df4737f811f1257c4 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Mon, 15 May 2017 10:24:36 -0700 Subject: [PATCH 8/8] Fix indentation. --- .../ripme/ripper/AbstractHTMLRipper.java | 36 +++++++++---------- .../ripper/rippers/DeviantartRipper.java | 33 ++++++++--------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 300e8a64..cdab1664 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -16,7 +16,7 @@ import com.rarchives.ripme.utils.Utils; * Simplified ripper, designed for ripping from sites by parsing HTML. */ public abstract class AbstractHTMLRipper extends AlbumRipper { - + public AbstractHTMLRipper(URL url) throws IOException { super(url); } @@ -30,7 +30,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } public abstract List getURLsFromPage(Document page); public List getDescriptionsFromPage(Document doc) throws IOException { - throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? + throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? } public abstract void downloadURL(URL url, int index); public DownloadThreadPool getThreadPool() { @@ -45,16 +45,16 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { public boolean canRip(URL url) { return url.getHost().endsWith(getDomain()); } - + @Override public URL sanitizeURL(URL url) throws MalformedURLException { return url; } public boolean hasDescriptionSupport() { - return false; + return false; } public String[] getDescription(String url,Document page) throws IOException { - throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? + throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { return 100; @@ -66,7 +66,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { logger.info("Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); Document doc = getFirstPage(); - + while (doc != null) { List imageURLs = getURLsFromPage(doc); // Remove all but 1 image @@ -79,7 +79,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { if (imageURLs.size() == 0) { throw new IOException("No images found at " + doc.location()); } - + for (String imageURL : imageURLs) { index += 1; logger.debug("Found image url #" + index + ": " + imageURL); @@ -90,15 +90,15 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } if (hasDescriptionSupport() && Utils.getConfigBoolean("descriptions.save", false)) { logger.debug("Fetching description(s) from " + doc.location()); - List textURLs = getDescriptionsFromPage(doc); - if (textURLs.size() > 0) { + List textURLs = getDescriptionsFromPage(doc); + if (textURLs.size() > 0) { logger.debug("Found description link(s) from " + doc.location()); - for (String textURL : textURLs) { - if (isStopped()) { - break; - } - textindex += 1; - logger.debug("Getting description from " + textURL); + for (String textURL : textURLs) { + if (isStopped()) { + break; + } + textindex += 1; + logger.debug("Getting description from " + textURL); String[] tempDesc = getDescription(textURL,doc); if (tempDesc != null) { if (Utils.getConfigBoolean("file.overwrite", false) || !(new File( @@ -116,8 +116,8 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } } - } - } + } + } } if (isStopped() || isThisATest()) { @@ -142,7 +142,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { } public String fileNameFromURL(URL url) { String saveAs = url.toExternalForm(); - if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} + if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 5bb0fae3..e61cb007 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -49,7 +49,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { } @Override public boolean hasDescriptionSupport() { - return true; + return true; } @Override public URL sanitizeURL(URL url) throws MalformedURLException { @@ -132,7 +132,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { continue; } script = script.substring(script.indexOf(id)); - script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); // first },"src":"url" after id + // first },"src":"url" after id + script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); return script.replace("\\/", "/"); } catch (StringIndexOutOfBoundsException e) { logger.debug("Unable to get json link from " + page.location()); @@ -156,23 +157,23 @@ public class DeviantartRipper extends AbstractHTMLRipper { } // Get full-sized image via helper methods String fullSize = null; - if (thumb.attr("data-super-full-img").contains("//orig")) { - fullSize = thumb.attr("data-super-full-img"); - } else { - String spanUrl = thumb.attr("href"); - String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); - if (fullSize1 == null || !fullSize1.contains("//orig")) { + if (thumb.attr("data-super-full-img").contains("//orig")) { + fullSize = thumb.attr("data-super-full-img"); + } else { + String spanUrl = thumb.attr("href"); + String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + if (fullSize1 == null || !fullSize1.contains("//orig")) { fullSize = smallToFull(img.attr("src"), spanUrl); - } - if (fullSize == null && fullSize1 != null) { + } + if (fullSize == null && fullSize1 != null) { fullSize = fullSize1; } - } + } if (fullSize == null) { - if (thumb.attr("data-super-full-img") != null) { - fullSize = thumb.attr("data-super-full-img"); + if (thumb.attr("data-super-full-img") != null) { + fullSize = thumb.attr("data-super-full-img"); } else if (thumb.attr("data-super-img") != null) { - fullSize = thumb.attr("data-super-img"); + fullSize = thumb.attr("data-super-img"); } else { continue; } @@ -273,7 +274,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { } return result.toString(); } - + /** * Attempts to download description for image. * Comes in handy when people put entire stories in their description. @@ -329,7 +330,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { return null; } } - + /** * If largest resolution for image at 'thumb' is found, starts downloading * and returns null.