Fixed deviantArt description file name issue.

The deviantArt description file name will now match the image file name,
rather than the URL.
This commit is contained in:
Wiiplay123 2017-04-26 23:13:11 -05:00
parent 8da945a8fe
commit f2afb840dd
2 changed files with 51 additions and 29 deletions

View File

@ -53,7 +53,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
public boolean hasDescriptionSupport() { public boolean hasDescriptionSupport() {
return false; return false;
} }
public String getDescription(String page) throws IOException { public String[] getDescription(String url,Document page) throws IOException {
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
} }
public int descSleepTime() { public int descSleepTime() {
@ -95,15 +95,16 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
logger.debug("Found description link(s) from " + doc.location()); logger.debug("Found description link(s) from " + doc.location());
for (String textURL : textURLs) { for (String textURL : textURLs) {
if (isStopped()) { if (isStopped()) {
break; break;
} }
textindex += 1; textindex += 1;
logger.debug("Getting description from " + textURL); logger.debug("Getting description from " + textURL);
sleep(descSleepTime()); sleep(descSleepTime());
String tempDesc = getDescription(textURL); String[] tempDesc = getDescription(textURL,doc);
if (tempDesc != null) { if (tempDesc != null) {
logger.debug("Got description: " + tempDesc); logger.debug("Got description from " + textURL);
saveText(new URL(textURL), "", tempDesc, textindex); saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]);
} }
} }
} }
@ -130,18 +131,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
waitForThreads(); waitForThreads();
} }
public boolean saveText(URL url, String subdirectory, String text, int index) { public boolean saveText(URL url, String subdirectory, String text, int index) {
// Not the best for some cases, like FurAffinity. Overridden there.
try {
stopCheck();
} catch (IOException e) {
return false;
}
String saveAs = url.toExternalForm(); String saveAs = url.toExternalForm();
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
return saveText(url,subdirectory,text,index,saveAs);
}
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
// Not the best for some cases, like FurAffinity. Overridden there.
try {
stopCheck();
} catch (IOException e) {
return false;
}
File saveFileAs; File saveFileAs;
try { try {
if (!subdirectory.equals("")) { // Not sure about this part if (!subdirectory.equals("")) { // Not sure about this part
@ -153,7 +157,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
+ subdirectory + subdirectory
+ File.separator + File.separator
+ getPrefix(index) + getPrefix(index)
+ saveAs + fileName
+ ".txt"); + ".txt");
// Write the file // Write the file
FileOutputStream out = (new FileOutputStream(saveFileAs)); FileOutputStream out = (new FileOutputStream(saveFileAs));

View File

@ -119,7 +119,19 @@ public class DeviantartRipper extends AbstractHTMLRipper {
.cookies(cookies) .cookies(cookies)
.get(); .get();
} }
public String jsonToImage(Document page,String id) {
Elements js = page.select("script[type=\"text/javascript\"]");
for (Element tag : js) {
if (tag.html().contains("window.__pageload")) {
String script = tag.html();
script = script.substring(script.indexOf("window.__pageload"));
script = script.substring(script.indexOf(id));
script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id
return script.replace("\\/","/");
}
}
return null;
}
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document page) {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<String>();
@ -139,18 +151,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
fullSize = thumb.attr("data-super-full-img"); fullSize = thumb.attr("data-super-full-img");
} else { } else {
String spanUrl = thumb.attr("href"); String spanUrl = thumb.attr("href");
// id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1) fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
Elements js = page.select("script[type=\"text/javascript\"]");
for (Element tag : js) {
if (tag.html().contains("window.__pageload")) {
String script = tag.html();
script = script.substring(script.indexOf("window.__pageload"));
script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1)));
script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id
fullSize = script.replace("\\/","/");
break;
}
}
if (fullSize == null) { if (fullSize == null) {
try { try {
fullSize = thumbToFull(img.attr("src"), true); fullSize = thumbToFull(img.attr("src"), true);
@ -263,17 +264,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
* Attempts to download description for image. * Attempts to download description for image.
* Comes in handy when people put entire stories in their description. * Comes in handy when people put entire stories in their description.
* If no description was found, returns null. * If no description was found, returns null.
* @param page The page the description will be retrieved from * @param url The URL the description will be retrieved from
* @return The description * @param page The gallery page the URL was found on
* @return A String[] with first object being the description, and the second object being image file name if found.
*/ */
@Override @Override
public String getDescription(String page) { public String[] getDescription(String url,Document page) {
if (isThisATest()) { if (isThisATest()) {
return null; return null;
} }
try { try {
// Fetch the image page // Fetch the image page
Response resp = Http.url(page) Response resp = Http.url(url)
.referrer(this.url) .referrer(this.url)
.cookies(cookies) .cookies(cookies)
.response(); .response();
@ -289,7 +291,23 @@ public class DeviantartRipper extends AbstractHTMLRipper {
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
ele.select("br").append("\\n"); ele.select("br").append("\\n");
ele.select("p").prepend("\\n\\n"); ele.select("p").prepend("\\n\\n");
return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); String fullSize = null;
Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0);
if (!thumb.attr("data-super-full-img").isEmpty()) {
fullSize = thumb.attr("data-super-full-img");
String[] split = fullSize.split("/");
fullSize = split[split.length - 1];
} else {
String spanUrl = thumb.attr("href");
fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
String[] split = fullSize.split("/");
fullSize = split[split.length - 1];
}
if (fullSize == null) {
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))};
}
fullSize = fullSize.substring(0,fullSize.lastIndexOf("."));
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize};
// TODO Make this not make a newline if someone just types \n into the description. // TODO Make this not make a newline if someone just types \n into the description.
} catch (IOException ioe) { } catch (IOException ioe) {
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");