Fixed deviantArt downloading low res when higher res is available
Also fixed a few bugs related to non-image items that crashed the rip.
This commit is contained in:
parent
d55cf06c90
commit
8f409eb28a
@ -57,7 +57,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
||||||
}
|
}
|
||||||
public int descSleepTime() {
|
public int descSleepTime() {
|
||||||
return 0;
|
return 100;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
@ -95,18 +95,28 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
logger.debug("Found description link(s) from " + doc.location());
|
logger.debug("Found description link(s) from " + doc.location());
|
||||||
for (String textURL : textURLs) {
|
for (String textURL : textURLs) {
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
textindex += 1;
|
textindex += 1;
|
||||||
logger.debug("Getting description from " + textURL);
|
logger.debug("Getting description from " + textURL);
|
||||||
sleep(descSleepTime());
|
|
||||||
String[] tempDesc = getDescription(textURL,doc);
|
String[] tempDesc = getDescription(textURL,doc);
|
||||||
if (tempDesc != null) {
|
if (tempDesc != null) {
|
||||||
|
if (Utils.getConfigBoolean("file.overwrite", false) || !(new File(
|
||||||
|
workingDir.getCanonicalPath()
|
||||||
|
+ ""
|
||||||
|
+ File.separator
|
||||||
|
+ getPrefix(index)
|
||||||
|
+ (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL)))
|
||||||
|
+ ".txt").exists())) {
|
||||||
logger.debug("Got description from " + textURL);
|
logger.debug("Got description from " + textURL);
|
||||||
saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]);
|
saveText(new URL(textURL), "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL))));
|
||||||
|
sleep(descSleepTime());
|
||||||
|
} else {
|
||||||
|
logger.debug("Description from " + textURL + " already exists.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,13 +140,17 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
public String fileNameFromURL(URL url) {
|
||||||
String saveAs = url.toExternalForm();
|
String saveAs = url.toExternalForm();
|
||||||
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
||||||
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
||||||
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
||||||
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
||||||
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
||||||
|
return saveAs;
|
||||||
|
}
|
||||||
|
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
||||||
|
String saveAs = fileNameFromURL(url);
|
||||||
return saveText(url,subdirectory,text,index,saveAs);
|
return saveText(url,subdirectory,text,index,saveAs);
|
||||||
}
|
}
|
||||||
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
|
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -114,6 +115,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
cookies = loginToDeviantart();
|
cookies = loginToDeviantart();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.warn("Failed to login: ", e);
|
logger.warn("Failed to login: ", e);
|
||||||
|
cookies.put("agegate_state","1"); // Bypasses the age gate
|
||||||
}
|
}
|
||||||
return Http.url(this.url)
|
return Http.url(this.url)
|
||||||
.cookies(cookies)
|
.cookies(cookies)
|
||||||
@ -123,11 +125,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
Elements js = page.select("script[type=\"text/javascript\"]");
|
Elements js = page.select("script[type=\"text/javascript\"]");
|
||||||
for (Element tag : js) {
|
for (Element tag : js) {
|
||||||
if (tag.html().contains("window.__pageload")) {
|
if (tag.html().contains("window.__pageload")) {
|
||||||
|
try {
|
||||||
String script = tag.html();
|
String script = tag.html();
|
||||||
script = script.substring(script.indexOf("window.__pageload"));
|
script = script.substring(script.indexOf("window.__pageload"));
|
||||||
|
if (script.indexOf(id) < 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
script = script.substring(script.indexOf(id));
|
script = script.substring(script.indexOf(id));
|
||||||
script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); // first },"src":"url" after id
|
script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); // first },"src":"url" after id
|
||||||
return script.replace("\\/", "/");
|
return script.replace("\\/", "/");
|
||||||
|
} catch (StringIndexOutOfBoundsException e) {
|
||||||
|
logger.debug("Unable to get json link from " + page.location());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
@ -147,23 +156,27 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
// Get full-sized image via helper methods
|
// Get full-sized image via helper methods
|
||||||
String fullSize = null;
|
String fullSize = null;
|
||||||
if (!thumb.attr("data-super-full-img").isEmpty()) {
|
if (thumb.attr("data-super-full-img").contains("//orig")) {
|
||||||
fullSize = thumb.attr("data-super-full-img");
|
fullSize = thumb.attr("data-super-full-img");
|
||||||
} else {
|
} else {
|
||||||
String spanUrl = thumb.attr("href");
|
String spanUrl = thumb.attr("href");
|
||||||
fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
|
String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
|
||||||
if (fullSize == null) {
|
if (fullSize1 == null || !fullSize1.contains("//orig")) {
|
||||||
try {
|
fullSize = smallToFull(img.attr("src"), spanUrl);
|
||||||
fullSize = thumbToFull(img.attr("src"), true);
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.info("Attempting to get full size image from " + thumb.attr("href"));
|
|
||||||
fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
|
|
||||||
}
|
}
|
||||||
|
if (fullSize == null && fullSize1 != null) {
|
||||||
|
fullSize = fullSize1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (fullSize == null) {
|
if (fullSize == null) {
|
||||||
|
if (thumb.attr("data-super-full-img") != null) {
|
||||||
|
fullSize = thumb.attr("data-super-full-img");
|
||||||
|
} else if (thumb.attr("data-super-img") != null) {
|
||||||
|
fullSize = thumb.attr("data-super-img");
|
||||||
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (triedURLs.contains(fullSize)) {
|
if (triedURLs.contains(fullSize)) {
|
||||||
logger.warn("Already tried to download " + fullSize);
|
logger.warn("Already tried to download " + fullSize);
|
||||||
continue;
|
continue;
|
||||||
@ -283,12 +296,11 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
cookies.putAll(resp.cookies());
|
cookies.putAll(resp.cookies());
|
||||||
|
|
||||||
// Try to find the description
|
// Try to find the description
|
||||||
Elements els = resp.parse().select("div[class=dev-description]");
|
Document documentz = resp.parse();
|
||||||
if (els.size() == 0) {
|
Element ele = documentz.select("div.dev-description").first();
|
||||||
|
if (ele == null) {
|
||||||
throw new IOException("No description found");
|
throw new IOException("No description found");
|
||||||
}
|
}
|
||||||
Document documentz = resp.parse();
|
|
||||||
Element ele = documentz.select("div[class=dev-description]").get(0);
|
|
||||||
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
|
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
|
||||||
ele.select("br").append("\\n");
|
ele.select("br").append("\\n");
|
||||||
ele.select("p").prepend("\\n\\n");
|
ele.select("p").prepend("\\n\\n");
|
||||||
@ -301,9 +313,11 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
} else {
|
} else {
|
||||||
String spanUrl = thumb.attr("href");
|
String spanUrl = thumb.attr("href");
|
||||||
fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
|
fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
|
||||||
|
if (fullSize != null) {
|
||||||
String[] split = fullSize.split("/");
|
String[] split = fullSize.split("/");
|
||||||
fullSize = split[split.length - 1];
|
fullSize = split[split.length - 1];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (fullSize == null) {
|
if (fullSize == null) {
|
||||||
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))};
|
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))};
|
||||||
}
|
}
|
||||||
@ -311,7 +325,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize};
|
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize};
|
||||||
// TODO Make this not make a newline if someone just types \n into the description.
|
// TODO Make this not make a newline if someone just types \n into the description.
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
logger.info("Failed to get description at " + url + ": '" + ioe.getMessage() + "'");
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -332,23 +346,43 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
.cookies(cookies)
|
.cookies(cookies)
|
||||||
.response();
|
.response();
|
||||||
cookies.putAll(resp.cookies());
|
cookies.putAll(resp.cookies());
|
||||||
|
|
||||||
// Try to find the download button
|
|
||||||
Document doc = resp.parse();
|
Document doc = resp.parse();
|
||||||
Elements els = doc.select("a.dev-page-download");
|
Elements els = doc.select("img.dev-content-full");
|
||||||
if (els.size() > 0) {
|
String fsimage = null;
|
||||||
// Full-size image
|
|
||||||
String fsimage = els.get(0).attr("href");
|
|
||||||
logger.info("Found download page: " + fsimage);
|
|
||||||
return fsimage;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the largest resolution image on the page
|
// Get the largest resolution image on the page
|
||||||
els = doc.select("img.dev-content-full");
|
|
||||||
if (els.size() > 0) {
|
if (els.size() > 0) {
|
||||||
// Large image
|
// Large image
|
||||||
String fsimage = els.get(0).attr("src");
|
fsimage = els.get(0).attr("src");
|
||||||
logger.info("Found large-scale: " + fsimage);
|
logger.info("Found large-scale: " + fsimage);
|
||||||
|
if (fsimage.contains("//orig")) {
|
||||||
|
return fsimage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Try to find the download button
|
||||||
|
els = doc.select("a.dev-page-download");
|
||||||
|
if (els.size() > 0) {
|
||||||
|
// Full-size image
|
||||||
|
String downloadLink = els.get(0).attr("href");
|
||||||
|
logger.info("Found download page: " + downloadLink);
|
||||||
|
HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection();
|
||||||
|
con.setRequestProperty("Referer",this.url.toString());
|
||||||
|
String cookieString = "";
|
||||||
|
for (Map.Entry<String, String> entry : cookies.entrySet()) {
|
||||||
|
cookieString = cookieString + entry.getKey() + "=" + entry.getValue() + "; ";
|
||||||
|
}
|
||||||
|
cookieString = cookieString.substring(0,cookieString.length() - 1);
|
||||||
|
con.setRequestProperty("Cookie",cookieString);
|
||||||
|
con.setRequestProperty("User-Agent",this.USER_AGENT);
|
||||||
|
con.setInstanceFollowRedirects(true);
|
||||||
|
con.connect();
|
||||||
|
int code = con.getResponseCode();
|
||||||
|
String location = con.getHeaderField("Location");
|
||||||
|
con.disconnect();
|
||||||
|
if (location.contains("//orig")) {
|
||||||
|
fsimage = location;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fsimage != null) {
|
||||||
return fsimage;
|
return fsimage;
|
||||||
}
|
}
|
||||||
throw new IOException("No download page found");
|
throw new IOException("No download page found");
|
||||||
|
Loading…
Reference in New Issue
Block a user