Merge pull request #144 from Wiiplay123/master
Added deviantART description ripping
This commit is contained in:
commit
27bda1bc9f
@ -1,5 +1,7 @@
|
|||||||
package com.rarchives.ripme.ripper;
|
package com.rarchives.ripme.ripper;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
@ -14,7 +16,7 @@ import com.rarchives.ripme.utils.Utils;
|
|||||||
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
* Simplified ripper, designed for ripping from sites by parsing HTML.
|
||||||
*/
|
*/
|
||||||
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
public abstract class AbstractHTMLRipper extends AlbumRipper {
|
||||||
|
|
||||||
public AbstractHTMLRipper(URL url) throws IOException {
|
public AbstractHTMLRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
@ -27,6 +29,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
throw new IOException("getNextPage not implemented");
|
throw new IOException("getNextPage not implemented");
|
||||||
}
|
}
|
||||||
public abstract List<String> getURLsFromPage(Document page);
|
public abstract List<String> getURLsFromPage(Document page);
|
||||||
|
public List<String> getDescriptionsFromPage(Document doc) throws IOException {
|
||||||
|
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
|
||||||
|
}
|
||||||
public abstract void downloadURL(URL url, int index);
|
public abstract void downloadURL(URL url, int index);
|
||||||
public DownloadThreadPool getThreadPool() {
|
public DownloadThreadPool getThreadPool() {
|
||||||
return null;
|
return null;
|
||||||
@ -45,21 +50,27 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
public boolean hasDescriptionSupport() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
public String getDescription(String page) throws IOException {
|
||||||
|
throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function?
|
||||||
|
}
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public void rip() throws IOException {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
int textindex = 0;
|
||||||
logger.info("Retrieving " + this.url);
|
logger.info("Retrieving " + this.url);
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||||
Document doc = getFirstPage();
|
Document doc = getFirstPage();
|
||||||
|
|
||||||
while (doc != null) {
|
while (doc != null) {
|
||||||
List<String> imageURLs = getURLsFromPage(doc);
|
List<String> imageURLs = getURLsFromPage(doc);
|
||||||
|
|
||||||
if (imageURLs.size() == 0) {
|
if (imageURLs.size() == 0) {
|
||||||
throw new IOException("No images found at " + doc.location());
|
throw new IOException("No images found at " + doc.location());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (String imageURL : imageURLs) {
|
for (String imageURL : imageURLs) {
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
break;
|
break;
|
||||||
@ -67,6 +78,21 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
index += 1;
|
index += 1;
|
||||||
downloadURL(new URL(imageURL), index);
|
downloadURL(new URL(imageURL), index);
|
||||||
}
|
}
|
||||||
|
if (hasDescriptionSupport()) {
|
||||||
|
List<String> textURLs = getDescriptionsFromPage(doc);
|
||||||
|
if (textURLs.size() > 0) {
|
||||||
|
for (String textURL : textURLs) {
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
textindex += 1;
|
||||||
|
String tempDesc = getDescription(textURL);
|
||||||
|
if (tempDesc != null) {
|
||||||
|
saveText(new URL(textURL), "", tempDesc, textindex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (isStopped()) {
|
if (isStopped()) {
|
||||||
break;
|
break;
|
||||||
@ -87,7 +113,46 @@ public abstract class AbstractHTMLRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
waitForThreads();
|
waitForThreads();
|
||||||
}
|
}
|
||||||
|
public boolean saveText(URL url, String subdirectory, String text, int index) {
|
||||||
|
try {
|
||||||
|
stopCheck();
|
||||||
|
} catch (IOException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
String saveAs = url.toExternalForm();
|
||||||
|
saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1);
|
||||||
|
if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); }
|
||||||
|
if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); }
|
||||||
|
if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); }
|
||||||
|
if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); }
|
||||||
|
File saveFileAs;
|
||||||
|
try {
|
||||||
|
if (!subdirectory.equals("")) { // Not sure about this part
|
||||||
|
subdirectory = File.separator + subdirectory;
|
||||||
|
}
|
||||||
|
// TODO Get prefix working again, probably requires reworking a lot of stuff!
|
||||||
|
saveFileAs = new File(
|
||||||
|
workingDir.getCanonicalPath()
|
||||||
|
+ subdirectory
|
||||||
|
+ File.separator
|
||||||
|
+ getPrefix(index)
|
||||||
|
+ saveAs
|
||||||
|
+ ".txt");
|
||||||
|
// Write the file
|
||||||
|
FileOutputStream out = (new FileOutputStream(saveFileAs));
|
||||||
|
out.write(text.getBytes());
|
||||||
|
out.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("[!] Error creating save file path for description '" + url + "':", e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
logger.debug("Downloading " + url + "'s description to " + saveFileAs);
|
||||||
|
if (!saveFileAs.getParentFile().exists()) {
|
||||||
|
logger.info("[+] Creating directory: " + Utils.removeCWD(saveFileAs.getParent()));
|
||||||
|
saveFileAs.getParentFile().mkdirs();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
public String getPrefix(int index) {
|
public String getPrefix(int index) {
|
||||||
String prefix = "";
|
String prefix = "";
|
||||||
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) {
|
||||||
|
@ -132,6 +132,7 @@ public abstract class AbstractRipper
|
|||||||
return addURLToDownload(url, saveFileAs, referrer, cookies);
|
return addURLToDownload(url, saveFileAs, referrer, cookies);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queues file to be downloaded and saved. With options.
|
* Queues file to be downloaded and saved. With options.
|
||||||
* @param url
|
* @param url
|
||||||
|
@ -15,8 +15,10 @@ import java.util.regex.Pattern;
|
|||||||
|
|
||||||
import org.jsoup.Connection.Method;
|
import org.jsoup.Connection.Method;
|
||||||
import org.jsoup.Connection.Response;
|
import org.jsoup.Connection.Response;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.safety.Whitelist;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
@ -43,7 +45,10 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
public String getDomain() {
|
public String getDomain() {
|
||||||
return "deviantart.com";
|
return "deviantart.com";
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public boolean hasDescriptionSupport() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
String u = url.toExternalForm();
|
String u = url.toExternalForm();
|
||||||
@ -118,7 +123,6 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
logger.info("Attempting to get full size image from " + thumb.attr("href"));
|
logger.info("Attempting to get full size image from " + thumb.attr("href"));
|
||||||
fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
|
fullSize = smallToFull(img.attr("src"), thumb.attr("href"));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fullSize == null) {
|
if (fullSize == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -131,7 +135,23 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
return imageURLs;
|
return imageURLs;
|
||||||
}
|
}
|
||||||
|
@Override
|
||||||
|
public List<String> getDescriptionsFromPage(Document page) {
|
||||||
|
List<String> textURLs = new ArrayList<String>();
|
||||||
|
|
||||||
|
// Iterate over all thumbnails
|
||||||
|
for (Element thumb : page.select("div.zones-container a.thumb")) {
|
||||||
|
if (isStopped()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Element img = thumb.select("img").get(0);
|
||||||
|
if (img.attr("transparent").equals("false")) {
|
||||||
|
continue; // a.thumbs to other albums are invisible
|
||||||
|
}
|
||||||
|
textURLs.add(thumb.attr("href"));
|
||||||
|
}
|
||||||
|
return textURLs;
|
||||||
|
}
|
||||||
@Override
|
@Override
|
||||||
public Document getNextPage(Document page) throws IOException {
|
public Document getNextPage(Document page) throws IOException {
|
||||||
Elements nextButtons = page.select("li.next > a");
|
Elements nextButtons = page.select("li.next > a");
|
||||||
@ -184,7 +204,42 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
return result.toString();
|
return result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempts to download description for image.
|
||||||
|
* Comes in handy when people put entire stories in their description.
|
||||||
|
* If no description was found, returns null.
|
||||||
|
* @param page The page the description will be retrieved from
|
||||||
|
* @return The description
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getDescription(String page) {
|
||||||
|
try {
|
||||||
|
// Fetch the image page
|
||||||
|
Response resp = Http.url(page)
|
||||||
|
.referrer(this.url)
|
||||||
|
.cookies(cookies)
|
||||||
|
.response();
|
||||||
|
cookies.putAll(resp.cookies());
|
||||||
|
|
||||||
|
// Try to find the description
|
||||||
|
Elements els = resp.parse().select("div[class=dev-description]");
|
||||||
|
if (els.size() == 0) {
|
||||||
|
throw new IOException("No description found");
|
||||||
|
}
|
||||||
|
Document documentz = resp.parse();
|
||||||
|
Element ele = documentz.select("div[class=dev-description]").get(0);
|
||||||
|
documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
|
||||||
|
ele.select("br").append("\\n");
|
||||||
|
ele.select("p").prepend("\\n\\n");
|
||||||
|
return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
|
||||||
|
// TODO Make this not make a newline if someone just types \n into the description.
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If largest resolution for image at 'thumb' is found, starts downloading
|
* If largest resolution for image at 'thumb' is found, starts downloading
|
||||||
* and returns null.
|
* and returns null.
|
||||||
@ -202,7 +257,7 @@ public class DeviantartRipper extends AbstractHTMLRipper {
|
|||||||
.response();
|
.response();
|
||||||
cookies.putAll(resp.cookies());
|
cookies.putAll(resp.cookies());
|
||||||
|
|
||||||
// Try to find the "Download" box
|
// Try to find the download button
|
||||||
Elements els = resp.parse().select("a.dev-page-download");
|
Elements els = resp.parse().select("a.dev-page-download");
|
||||||
if (els.size() == 0) {
|
if (els.size() == 0) {
|
||||||
throw new IOException("No download page found");
|
throw new IOException("No download page found");
|
||||||
|
@ -231,10 +231,12 @@ public class Utils {
|
|||||||
classes.add(Class.forName(className));
|
classes.add(Class.forName(className));
|
||||||
} catch (ClassNotFoundException e) {
|
} catch (ClassNotFoundException e) {
|
||||||
logger.error("ClassNotFoundException loading " + className);
|
logger.error("ClassNotFoundException loading " + className);
|
||||||
|
jarFile.close(); // Resource leak fix?
|
||||||
throw new RuntimeException("ClassNotFoundException loading " + className);
|
throw new RuntimeException("ClassNotFoundException loading " + className);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
jarFile.close(); // Eclipse said not closing it would have a resource leak
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("Error while loading jar file:", e);
|
logger.error("Error while loading jar file:", e);
|
||||||
throw new RuntimeException(pkgname + " (" + directory + ") does not appear to be a valid package", e);
|
throw new RuntimeException(pkgname + " (" + directory + ") does not appear to be a valid package", e);
|
||||||
|
Loading…
Reference in New Issue
Block a user