Fixed deviantArt description file name issue.
The deviantArt description file name will now match the image file name, rather than the URL.
This commit is contained in:
parent
8da945a8fe
commit
f2afb840dd
@ -53,7 +53,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
public boolean hasDescriptionSupport() {
|
public boolean hasDescriptionSupport() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
public String getDescription(String page) throws IOException {
|
public String[] getDescription(String url,Document page) throws IOException {
|
||||||
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
||||||
}
|
}
|
||||||
public int descSleepTime() {
|
public int descSleepTime() {
|
||||||
@ -95,15 +95,16 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
logger.debug("Found description link(s) from " + doc.location());
|
logger.debug("Found description link(s) from " + doc.location());
|
||||||
for (String textURL : textURLs) {
|
for (String textURL : textURLs) {
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
textindex += 1;
|
textindex += 1;
|
||||||
logger.debug("Getting description from " + textURL);
|
logger.debug("Getting description from " + textURL);
|
||||||
sleep(descSleepTime());
|
sleep(descSleepTime());
|
||||||
String tempDesc = getDescription(textURL);
|
String[] tempDesc = getDescription(textURL,doc);
|
||||||
if (tempDesc != null) {
|
if (tempDesc != null) {
|
||||||
logger.debug("Got description: " + tempDesc);
|
logger.debug("Got description from " + textURL);
|
||||||
saveText(new URL(textURL), "", tempDesc, textindex);
|
saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -130,18 +131,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
||||||
// Not the best for some cases, like FurAffinity. Overridden there.
|
|
||||||
try {
|
|
||||||
stopCheck();
|
|
||||||
} catch (IOException e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
String saveAs = url.toExternalForm();
|
String saveAs = url.toExternalForm();
|
||||||
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
||||||
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
||||||
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
||||||
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
||||||
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
||||||
|
return saveText(url,subdirectory,text,index,saveAs);
|
||||||
|
}
|
||||||
|
public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) {
|
||||||
|
// Not the best for some cases, like FurAffinity. Overridden there.
|
||||||
|
try {
|
||||||
|
stopCheck();
|
||||||
|
} catch (IOException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
File saveFileAs;
|
File saveFileAs;
|
||||||
try {
|
try {
|
||||||
if (!subdirectory.equals("")) { // Not sure about this part
|
if (!subdirectory.equals("")) { // Not sure about this part
|
||||||
@ -153,7 +157,7 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
+ subdirectory
|
+ subdirectory
|
||||||
+ File.separator
|
+ File.separator
|
||||||
+ getPrefix(index)
|
+ getPrefix(index)
|
||||||
+ saveAs
|
+ fileName
|
||||||
+ ".txt");
|
+ ".txt");
|
||||||
// Write the file
|
// Write the file
|
||||||
FileOutputStream out = (new FileOutputStream(saveFileAs));
|
FileOutputStream out = (new FileOutputStream(saveFileAs));
|
||||||
|
@ -119,7 +119,19 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
.cookies(cookies)
|
.cookies(cookies)
|
||||||
.get();
|
.get();
|
||||||
}
|
}
|
||||||
|
public String jsonToImage(Document page,String id) {
|
||||||
|
Elements js = page.select("script[type=\"text/javascript\"]");
|
||||||
|
for (Element tag : js) {
|
||||||
|
if (tag.html().contains("window.__pageload")) {
|
||||||
|
String script = tag.html();
|
||||||
|
script = script.substring(script.indexOf("window.__pageload"));
|
||||||
|
script = script.substring(script.indexOf(id));
|
||||||
|
script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id
|
||||||
|
return script.replace("\\/","/");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document page) {
|
public List<String> getURLsFromPage(Document page) {
|
||||||
List<String> imageURLs = new ArrayList<String>();
|
List<String> imageURLs = new ArrayList<String>();
|
||||||
@ -139,18 +151,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
fullSize = thumb.attr("data-super-full-img");
|
fullSize = thumb.attr("data-super-full-img");
|
||||||
} else {
|
} else {
|
||||||
String spanUrl = thumb.attr("href");
|
String spanUrl = thumb.attr("href");
|
||||||
// id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1)
|
fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
|
||||||
Elements js = page.select("script[type=\"text/javascript\"]");
|
|
||||||
for (Element tag : js) {
|
|
||||||
if (tag.html().contains("window.__pageload")) {
|
|
||||||
String script = tag.html();
|
|
||||||
script = script.substring(script.indexOf("window.__pageload"));
|
|
||||||
script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1)));
|
|
||||||
script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id
|
|
||||||
fullSize = script.replace("\\/","/");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (fullSize == null) {
|
if (fullSize == null) {
|
||||||
try {
|
try {
|
||||||
fullSize = thumbToFull(img.attr("src"), true);
|
fullSize = thumbToFull(img.attr("src"), true);
|
||||||
@ -263,17 +264,18 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
* Attempts to download description for image.
|
* Attempts to download description for image.
|
||||||
* Comes in handy when people put entire stories in their description.
|
* Comes in handy when people put entire stories in their description.
|
||||||
* If no description was found, returns null.
|
* If no description was found, returns null.
|
||||||
* @param page The page the description will be retrieved from
|
* @param url The URL the description will be retrieved from
|
||||||
* @return The description
|
* @param page The gallery page the URL was found on
|
||||||
|
* @return A String[] with first object being the description, and the second object being image file name if found.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String getDescription(String page) {
|
public String[] getDescription(String url,Document page) {
|
||||||
if (isThisATest()) {
|
if (isThisATest()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
// Fetch the image page
|
// Fetch the image page
|
||||||
Response resp = Http.url(page)
|
Response resp = Http.url(url)
|
||||||
.referrer(this.url)
|
.referrer(this.url)
|
||||||
.cookies(cookies)
|
.cookies(cookies)
|
||||||
.response();
|
.response();
|
||||||
@ -289,7 +291,23 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
|
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
|
||||||
ele.select("br").append("\\n");
|
ele.select("br").append("\\n");
|
||||||
ele.select("p").prepend("\\n\\n");
|
ele.select("p").prepend("\\n\\n");
|
||||||
return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
|
String fullSize = null;
|
||||||
|
Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0);
|
||||||
|
if (!thumb.attr("data-super-full-img").isEmpty()) {
|
||||||
|
fullSize = thumb.attr("data-super-full-img");
|
||||||
|
String[] split = fullSize.split("/");
|
||||||
|
fullSize = split[split.length - 1];
|
||||||
|
} else {
|
||||||
|
String spanUrl = thumb.attr("href");
|
||||||
|
fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1));
|
||||||
|
String[] split = fullSize.split("/");
|
||||||
|
fullSize = split[split.length - 1];
|
||||||
|
}
|
||||||
|
if (fullSize == null) {
|
||||||
|
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))};
|
||||||
|
}
|
||||||
|
fullSize = fullSize.substring(0,fullSize.lastIndexOf("."));
|
||||||
|
return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize};
|
||||||
// TODO Make this not make a newline if someone just types \n into the description.
|
// TODO Make this not make a newline if someone just types \n into the description.
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
||||||
|
Loading…
Reference in New Issue
Block a user