Merge pull request #573 from kevin51jiang/ImageBamGetDirectURL

Failed tests have nothing to do with this update.
This commit is contained in:
Kevin Jiang 2018-05-12 18:07:13 -04:00 committed by GitHub
commit b05938df94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 27 deletions

View File

@ -1,5 +1,9 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Base64;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
@ -13,7 +17,6 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
@ -22,11 +25,6 @@ import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Base64;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class DeviantartRipper extends AbstractHTMLRipper {
private static final int PAGE_SLEEP_TIME = 3000,
@ -108,19 +106,46 @@ public class DeviantartRipper extends AbstractHTMLRipper {
throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url);
}
/**
* Gets first page.
* Will determine if login is supplied,
* if there is a login, then login and add that login cookies.
* Otherwise, just bypass the age gate with an anonymous flag.
* @return
* @throws IOException
*/
@Override
public Document getFirstPage() throws IOException {
// Login
try {
cookies = loginToDeviantart();
} catch (Exception e) {
logger.warn("Failed to login: ", e);
//Test to see if there is a login:
String username = Utils.getConfigString("deviantart.username", new String(Base64.decode("Z3JhYnB5")));
String password = Utils.getConfigString("deviantart.password", new String(Base64.decode("ZmFrZXJz")));
if (username == null || password == null) {
logger.debug("No DeviantArt login provided.");
cookies.put("agegate_state","1"); // Bypasses the age gate
} else {
// Attempt Login
try {
cookies = loginToDeviantart();
} catch (IOException e) {
logger.warn("Failed to login: ", e);
cookies.put("agegate_state","1"); // Bypasses the age gate
}
}
return Http.url(this.url)
.cookies(cookies)
.get();
}
/**
*
* @param page
* @param id
* @return
*/
private String jsonToImage(Document page, String id) {
Elements js = page.select("script[type=\"text/javascript\"]");
for (Element tag : js) {

View File

@ -1,5 +1,9 @@
package com.rarchives.ripme.ripper.rippers;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
@ -7,16 +11,10 @@ import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;
public class ImagebamRipper extends AbstractHTMLRipper {
// Current HTML document
@ -71,7 +69,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
public Document getNextPage(Document doc) throws IOException {
// Find next page
Elements hrefs = doc.select("a.pagination_current + a.pagination_link");
if (hrefs.size() == 0) {
if (hrefs.isEmpty()) {
throw new IOException("No more pages");
}
String nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href");
@ -121,8 +119,8 @@ public class ImagebamRipper extends AbstractHTMLRipper {
* Handles case when site has IP-banned the user.
*/
private class ImagebamImageThread extends Thread {
private URL url;
private int index;
private URL url; //link to "image page"
private int index; //index in album
ImagebamImageThread(URL url, int index) {
super();
@ -134,28 +132,43 @@ public class ImagebamRipper extends AbstractHTMLRipper {
public void run() {
fetchImage();
}
/**
* Rips useful image from "image page"
*/
private void fetchImage() {
try {
Document doc = Http.url(url).get();
// Find image
Elements images = doc.select(".image-container img");
if (images.size() == 0) {
Elements metaTags = doc.getElementsByTag("meta");
String imgsrc = "";//initialize, so no NullPointerExceptions should ever happen.
for (Element metaTag: metaTags) {
//the direct link to the image seems to always be linked in the <meta> part of the html.
if (metaTag.attr("property").equals("og:image")) {
imgsrc = metaTag.attr("content");
logger.info("Found URL " + imgsrc);
break;//only one (useful) image possible for an "image page".
}
}
//for debug, or something goes wrong.
if (imgsrc.isEmpty()) {
logger.warn("Image not found at " + this.url);
return;
}
Element image = images.first();
String imgsrc = image.attr("src");
logger.info("Found URL " + imgsrc);
// Provide prefix and let the AbstractRipper "guess" the filename
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
addURLToDownload(new URL(imgsrc), prefix);
} catch (IOException e) {
logger.error("[!] Exception while loading/parsing " + this.url, e);
}
}
}
}
}