From 1b20e98f8f47aa375f0c700566b4bbaa2dfd1f15 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Fri, 20 Jun 2014 04:09:36 -0700 Subject: [PATCH] Unifying external page requests to a single method --- .../ripme/ripper/AbstractRipper.java | 91 +++++++++++++++++++ .../rarchives/ripme/ripper/AlbumRipper.java | 47 ---------- .../ripper/rippers/ButttoucherRipper.java | 4 +- .../ripme/ripper/rippers/ChanRipper.java | 7 +- .../ripper/rippers/DeviantartRipper.java | 17 +--- .../ripme/ripper/rippers/DrawcrowdRipper.java | 7 +- .../ripme/ripper/rippers/EHentaiRipper.java | 34 +++---- .../ripper/rippers/EightmusesRipper.java | 16 +--- .../ripme/ripper/rippers/FapprovedRipper.java | 15 +-- .../ripme/ripper/rippers/FlickrRipper.java | 21 ++--- .../ripme/ripper/rippers/GifyoRipper.java | 6 +- .../ripper/rippers/GirlsOfDesireRipper.java | 13 +-- .../ripme/ripper/rippers/GonewildRipper.java | 5 +- .../ripper/rippers/HentaifoundryRipper.java | 20 +--- .../ripme/ripper/rippers/ImagearnRipper.java | 12 ++- .../ripme/ripper/rippers/ImagebamRipper.java | 22 +---- .../ripme/ripper/rippers/ImagefapRipper.java | 6 +- .../ripper/rippers/ImagestashRipper.java | 15 ++- .../ripper/rippers/ImagevenueRipper.java | 14 +-- .../ripme/ripper/rippers/ImgboxRipper.java | 9 +- .../ripme/ripper/rippers/ImgurRipper.java | 21 ++--- .../ripme/ripper/rippers/InstagramRipper.java | 14 +-- .../ripper/rippers/IrarchivesRipper.java | 14 ++- .../ripper/rippers/MediacrushRipper.java | 10 +- .../ripme/ripper/rippers/MinusRipper.java | 15 +-- .../ripper/rippers/MotherlessRipper.java | 27 +++--- .../ripme/ripper/rippers/NfsfwRipper.java | 22 ++--- .../ripper/rippers/PhotobucketRipper.java | 10 +- .../ripme/ripper/rippers/PornhubRipper.java | 23 +---- .../ripme/ripper/rippers/RedditRipper.java | 6 +- .../ripme/ripper/rippers/SeeniveRipper.java | 25 +++-- .../ripme/ripper/rippers/SmuttyRipper.java | 12 ++- .../ripper/rippers/SupertangasRipper.java | 6 +- .../ripper/rippers/TeenplanetRipper.java | 9 +- .../ripme/ripper/rippers/TumblrRipper.java | 19 ++-- .../ripme/ripper/rippers/VidbleRipper.java | 5 +- .../ripme/ripper/rippers/VineRipper.java | 6 +- .../ripme/ripper/rippers/VineboxRipper.java | 7 +- .../ripme/ripper/rippers/XhamsterRipper.java | 3 +- .../ripper/rippers/video/BeegRipper.java | 7 +- .../ripper/rippers/video/GfycatRipper.java | 5 +- .../ripper/rippers/video/PornhubRipper.java | 4 +- .../ripper/rippers/video/ViddmeRipper.java | 5 +- .../ripper/rippers/video/VineRipper.java | 5 +- .../ripper/rippers/video/XvideosRipper.java | 7 +- .../ripper/rippers/video/YoupornRipper.java | 6 +- src/main/resources/rip.properties | 3 + 47 files changed, 280 insertions(+), 397 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index 95705e38..1a1016a3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -12,7 +12,12 @@ import java.util.Observable; import org.apache.log4j.FileAppender; import org.apache.log4j.Logger; +import org.jsoup.Connection; +import org.jsoup.Connection.Method; +import org.jsoup.Connection.Response; import org.jsoup.HttpStatusException; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; import com.rarchives.ripme.ui.RipStatusHandler; import com.rarchives.ripme.ui.RipStatusMessage; @@ -28,6 +33,8 @@ public abstract class AbstractRipper public static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:29.0) Gecko/20100101 Firefox/29.0"; + public static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000); + protected URL url; protected File workingDir; protected DownloadThreadPool threadPool; @@ -54,6 +61,10 @@ public abstract class AbstractRipper } } + protected int getTimeout() { + return TIMEOUT; + } + /** * Ensures inheriting ripper can rip this URL, raises exception if not. * Otherwise initializes working directory and thread pool. @@ -335,4 +346,84 @@ public abstract class AbstractRipper public void setBytesCompleted(int bytes) { // Do nothing } + + // Thar be overloaded methods afoot + public Document getDocument(URL url) throws IOException { + return getDocument(url.toExternalForm()); + } + public Document getDocument(String url) throws IOException { + return getResponse(url).parse(); + } + public Document getDocument(String url, boolean ignoreContentType) throws IOException { + return getResponse(url, ignoreContentType).parse(); + } + public Document getDocument(String url, Map cookies) throws IOException { + return getResponse(url, cookies).parse(); + } + public Document getDocument(String url, String referrer, Map cookies) throws IOException { + return getResponse(url, Method.GET, referrer, cookies).parse(); + } + public Response getResponse(String url) throws IOException { + return getResponse(url, Method.GET, USER_AGENT, null, null, false); + } + public Response getResponse(URL url) throws IOException { + return getResponse(url.toExternalForm()); + } + public Response getResponse(String url, String referrer) throws IOException { + return getResponse(url, Method.GET, USER_AGENT, referrer, null, false); + } + public Response getResponse(URL url, boolean ignoreContentType) throws IOException { + return getResponse(url.toExternalForm(), Method.GET, USER_AGENT, null, null, ignoreContentType); + } + public Response getResponse(String url, Map cookies) throws IOException { + return getResponse(url, Method.GET, USER_AGENT, cookies); + } + public Response getResponse(String url, String referrer, Map cookies) throws IOException { + return getResponse(url, Method.GET, referrer, cookies); + } + public Response getResponse(String url, Method method) throws IOException { + return getResponse(url, method, USER_AGENT, null, null, false); + } + public Response getResponse(String url, Method method, String referrer, Map cookies) throws IOException { + return getResponse(url, method, USER_AGENT, referrer, cookies, false); + } + public Response getResponse(String url, boolean ignoreContentType) throws IOException { + return getResponse(url, Method.GET, USER_AGENT, null, null, ignoreContentType); + } + public Response getResponse(String url, Method method, boolean ignoreContentType) throws IOException { + return getResponse(url, method, USER_AGENT, null, null, false); + } + + public Response getResponse(String url, + Method method, + String userAgent, + String referrer, + Map cookies, + boolean ignoreContentType) + throws IOException { + Connection connection = Jsoup.connect(url); + + connection.method( (method == null) ? Method.GET : method); + connection.userAgent( (userAgent == null) ? USER_AGENT : userAgent); + connection.ignoreContentType(ignoreContentType); + connection.timeout(getTimeout()); + connection.maxBodySize(0); + + if (cookies != null) { connection.cookies(cookies); } + if (referrer != null) { connection.referrer(referrer); } + + Response response = null; + int retries = Utils.getConfigInteger("download.retries", 1);; + while (retries >= 0) { + retries--; + try { + response = connection.execute(); + } catch (IOException e) { + logger.warn("Error while loading " + url, e); + continue; + } + } + return response; + } + } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java index 20378c00..43d6bbb5 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AlbumRipper.java @@ -8,11 +8,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import org.jsoup.Connection; -import org.jsoup.Connection.Method; -import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; - import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; @@ -175,47 +170,5 @@ public abstract class AlbumRipper extends AbstractRipper { .append(", Errored: " ).append(itemsErrored.size()); return sb.toString(); } - - public Response getResponse(String url, - Method method, - String userAgent, - String referrer, - Map cookies, - boolean ignoreContentType) - throws IOException { - Connection connection = Jsoup.connect(url); - if (method == null) { - method = Method.GET; - } - connection.method(method); - - if (userAgent == null) { - userAgent = USER_AGENT; - } - connection.userAgent(userAgent); - - if (cookies != null) { - connection.cookies(cookies); - } - - if (referrer != null) { - connection.referrer(referrer); - } - connection.ignoreContentType(ignoreContentType); - - connection.maxBodySize(0); - Response response = null; - int retries = Utils.getConfigInteger("download.retries", 1);; - while (retries >= 0) { - retries--; - try { - response = connection.execute(); - } catch (IOException e) { - logger.warn("Error while loading " + url, e); - continue; - } - } - return response; - } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java index 7ab6ae48..8a2adb83 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java @@ -46,9 +46,9 @@ public class ButttoucherRipper extends AlbumRipper { @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url.toExternalForm()); + logger.info("Retrieving " + this.url); if (albumDoc == null) { - albumDoc = Jsoup.connect(this.url.toExternalForm()).get(); + albumDoc = getDocument(this.url); } int index = 0; for (Element thumb : albumDoc.select("div.image-gallery > a > img")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index 206024af..d63fce03 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -8,7 +8,6 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -79,10 +78,8 @@ public class ChanRipper extends AlbumRipper { Set attempted = new HashSet(); int index = 0; Pattern p; Matcher m; - logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + logger.info("Retrieving " + this.url); + Document doc = getDocument(this.url); for (Element link : doc.select("a")) { if (!link.hasAttr("href")) { continue; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 1a7d7d8e..d0e1407f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -65,10 +65,7 @@ public class DeviantartRipper extends AlbumRipper { logger.info(" Retrieving " + nextURL); sendUpdate(STATUS.LOADING_RESOURCE, "Retrieving " + nextURL); - Document doc = Jsoup.connect(nextURL) - .cookies(cookies) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(nextURL, cookies); // Iterate over all thumbnails for (Element thumb : doc.select("div.zones-container a.thumb")) { @@ -190,12 +187,7 @@ public class DeviantartRipper extends AlbumRipper { public String smallToFull(String thumb, String page) { try { // Fetch the image page - Response resp = Jsoup.connect(page) - .userAgent(USER_AGENT) - .cookies(cookies) - .referrer(this.url.toExternalForm()) - .method(Method.GET) - .execute(); + Response resp = getResponse(page, Method.GET, USER_AGENT, this.url.toExternalForm(), cookies, false); Map cookies = resp.cookies(); cookies.putAll(this.cookies); @@ -262,10 +254,7 @@ public class DeviantartRipper extends AlbumRipper { if (username == null || password == null) { throw new IOException("could not find username or password in config"); } - Response resp = Jsoup.connect("http://www.deviantart.com/") - .userAgent(USER_AGENT) - .method(Method.GET) - .execute(); + Response resp = getResponse("http://www.deviantart.com/"); for (Element input : resp.parse().select("form#form-login input[type=hidden]")) { postData.put(input.attr("name"), input.attr("value")); } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java index fd48e5db..69323bbc 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -62,8 +61,8 @@ public class DrawcrowdRipper extends AlbumRipper { public void rip() throws IOException { int index = 0; sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - logger.info(" Retrieving " + this.url.toExternalForm()); - Document albumDoc = Jsoup.connect(this.url.toExternalForm()).get(); + logger.info("Retrieving " + this.url); + Document albumDoc = getDocument(this.url); while (true) { if (isStopped()) { break; @@ -92,7 +91,7 @@ public class DrawcrowdRipper extends AlbumRipper { throw new IOException(e); } sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - albumDoc = Jsoup.connect(nextURL).get(); + albumDoc = getDocument(nextURL); } waitForThreads(); } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 2d46cc0f..75054ae1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -4,10 +4,11 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.HashMap; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -22,7 +23,6 @@ public class EHentaiRipper extends AlbumRipper { private static final int PAGE_SLEEP_TIME = 3 * 1000; private static final int IMAGE_SLEEP_TIME = 1 * 1000; private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000; - private static final int TIMEOUT = 5 * 1000; private static final String DOMAIN = "g.e-hentai.org", HOST = "e-hentai"; @@ -31,6 +31,12 @@ public class EHentaiRipper extends AlbumRipper { // Current HTML document private Document albumDoc = null; + + private static final Map cookies = new HashMap(); + static { + cookies.put("nw", "1"); + cookies.put("tip", "1"); + } public EHentaiRipper(URL url) throws IOException { super(url); @@ -49,14 +55,9 @@ public class EHentaiRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .cookie("nw", "1") - .cookie("tip", "1") - .timeout(TIMEOUT) - .get(); + logger.info("Retrieving " + url); + albumDoc = getDocument(url.toExternalForm(), cookies); } Elements elems = albumDoc.select("#gn"); return HOST + "_" + elems.get(0).text(); @@ -95,12 +96,7 @@ public class EHentaiRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Jsoup.connect(nextUrl) - .userAgent(USER_AGENT) - .cookie("nw", "1") - .timeout(TIMEOUT) - .referrer(this.url.toExternalForm()) - .get(); + albumDoc = getDocument(nextUrl, this.url.toExternalForm(), cookies); } // Check for rate limiting if (albumDoc.toString().contains("IP address will be automatically banned")) { @@ -201,12 +197,8 @@ public class EHentaiRipper extends AlbumRipper { private void fetchImage() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .cookie("nw", "1") - .timeout(TIMEOUT) - .referrer(this.url.toExternalForm()) - .get(); + String u = this.url.toExternalForm(); + Document doc = getDocument(u, u, cookies); // Check for rate limit if (doc.toString().contains("IP address will be automatically banned")) { if (this.retries == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 9970047d..9597994b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -7,9 +7,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Connection.Method; -import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -43,12 +40,7 @@ public class EightmusesRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .method(Method.GET) - .timeout(Utils.getConfigInteger("download.timeout", 5000)) - .execute() - .parse(); + albumDoc = getDocument(url); } Element titleElement = albumDoc.select("meta[name=description]").first(); String title = titleElement.attr("content"); @@ -71,11 +63,7 @@ public class EightmusesRipper extends AlbumRipper { logger.info(" Retrieving " + url); sendUpdate(STATUS.LOADING_RESOURCE, url); if (albumDoc == null) { - Response resp = Jsoup.connect(url) - .userAgent(USER_AGENT) - .timeout(Utils.getConfigInteger("download.timeout", 5000)) - .execute(); - albumDoc = resp.parse(); + albumDoc = getDocument(url); } int index = 0; // Both album index and image index diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java index 80cd96e4..6f9251d4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java @@ -6,12 +6,12 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Utils; public class FapprovedRipper extends AlbumRipper { @@ -38,7 +38,7 @@ public class FapprovedRipper extends AlbumRipper { } @Override public void rip() throws IOException { - int page = 0; + int index = 0, page = 0; String url, user = getGID(this.url); boolean hasNextPage = true; while (hasNextPage) { @@ -46,15 +46,18 @@ public class FapprovedRipper extends AlbumRipper { url = "http://fapproved.com/users/" + user + "/images?page=" + page; this.sendUpdate(STATUS.LOADING_RESOURCE, url); logger.info(" Retrieving " + url); - Document doc = Jsoup.connect(url) - .ignoreContentType(true) - .get(); + Document doc = getDocument(url, true); for (Element image : doc.select("div.actual-image img")) { String imageUrl = image.attr("src"); if (imageUrl.startsWith("//")) { imageUrl = "http:" + imageUrl; } - addURLToDownload(new URL(imageUrl)); + index++; + String prefix = ""; + if (Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index); + } + addURLToDownload(new URL(imageUrl), prefix); } if ( (doc.select("div.pagination li.next.disabled").size() != 0) || (doc.select("div.pagination").size() == 0) ) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 6c1d9284..e91879f1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -61,7 +61,7 @@ public class FlickrRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()).get(); + albumDoc = getDocument(url); } String user = url.toExternalForm(); user = user.substring(user.indexOf("/photos/") + "/photos/".length()); @@ -124,8 +124,7 @@ public class FlickrRipper extends AlbumRipper { } logger.info(" Retrieving " + nextURL); if (albumDoc == null) { - albumDoc = Jsoup.connect(nextURL) - .get(); + albumDoc = getDocument(nextURL); } for (Element thumb : albumDoc.select("a[data-track=photo-click]")) { String imageTitle = null; @@ -212,10 +211,10 @@ public class FlickrRipper extends AlbumRipper { postData.put("passwd", new String(Base64.decode("MUZha2V5ZmFrZQ=="))); String action = doc.select("form[method=post]").get(0).attr("action"); resp = Jsoup.connect(action) - .cookies(resp.cookies()) - .data(postData) - .method(Method.POST) - .execute(); + .cookies(resp.cookies()) + .data(postData) + .method(Method.POST) + .execute(); return resp.cookies(); } @@ -260,9 +259,7 @@ public class FlickrRipper extends AlbumRipper { private Document getLargestImagePageDocument(URL url) throws IOException { // Get current page - Document doc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(url); // Look for larger image page String largestImagePage = this.url.toExternalForm(); for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) { @@ -280,9 +277,7 @@ public class FlickrRipper extends AlbumRipper { } if (!largestImagePage.equals(this.url.toExternalForm())) { // Found larger image page, get it. - doc = Jsoup.connect(largestImagePage) - .userAgent(USER_AGENT) - .get(); + doc = getDocument(largestImagePage); } return doc; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java index 3cbdd105..abc51fc7 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java @@ -50,11 +50,7 @@ public class GifyoRipper extends AlbumRipper { logger.info(" Retrieving " + this.url + "(page #" + page + ")"); Response resp = null; if (page == 0) { - resp = Jsoup.connect(this.url.toExternalForm()) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .method(Method.GET) - .execute(); + resp = getResponse(this.url, true); cookies = resp.cookies(); } else { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java index 37457754..eee06858 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -17,7 +16,6 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS; public class GirlsOfDesireRipper extends AlbumRipper { // All sleep times are in milliseconds private static final int IMAGE_SLEEP_TIME = 100; - private static final int TIMEOUT = 5 * 1000; private static final String DOMAIN = "girlsofdesire.org", HOST = "GirlsOfDesire"; @@ -43,10 +41,7 @@ public class GirlsOfDesireRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(TIMEOUT) - .get(); + albumDoc = getDocument(url); } Elements elems = albumDoc.select(".albumName"); return HOST + "_" + elems.first().text(); @@ -81,11 +76,7 @@ public class GirlsOfDesireRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Jsoup.connect(nextUrl) - .userAgent(USER_AGENT) - .timeout(TIMEOUT) - .referrer(this.url.toExternalForm()) - .get(); + albumDoc = getDocument(nextUrl); } // Find thumbnails diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java index cab298db..faba994f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java @@ -57,10 +57,7 @@ public class GonewildRipper extends AlbumRipper { gwURL = baseGwURL + "&start=" + start; start += count; - jsonString = Jsoup.connect(gwURL) - .ignoreContentType(true) - .execute() - .body(); + jsonString = getResponse(gwURL, true).body(); json = new JSONObject(jsonString); if (json.has("error")) { logger.error("Error while retrieving user posts:" + json.getString("error")); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java index 141f59e0..9785736a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java @@ -7,9 +7,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -42,14 +40,11 @@ public class HentaifoundryRipper extends AlbumRipper { int index = 0; // Get cookies - Response resp = Jsoup.connect("http://www.hentai-foundry.com/") - .execute(); + Response resp = getResponse("http://www.hentai-foundry.com/"); Map cookies = resp.cookies(); - resp = Jsoup.connect("http://www.hentai-foundry.com/?enterAgree=1&size=1500") - .referrer("http://www.hentai-foundry.com/") - .cookies(cookies) - .method(Method.GET) - .execute(); + resp = getResponse("http://www.hentai-foundry.com/?enterAgree=1&size=1500", + "http://www.hentai-foundry.com/", + cookies); cookies = resp.cookies(); logger.info("cookies: " + cookies); @@ -59,12 +54,7 @@ public class HentaifoundryRipper extends AlbumRipper { break; } sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - Document doc = Jsoup.connect(nextURL) - .userAgent(USER_AGENT) - .timeout(5000) - .cookies(cookies) - .referrer(this.url.toExternalForm()) - .get(); + Document doc = getDocument(nextURL, this.url.toExternalForm(), cookies); for (Element thumb : doc.select("td > a:first-child")) { if (isStopped()) { break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java index 5d2a3893..ea94fb22 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java @@ -6,11 +6,11 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; public class ImagearnRipper extends AlbumRipper { @@ -41,7 +41,7 @@ public class ImagearnRipper extends AlbumRipper { } private URL getGalleryFromImage(URL url) throws IOException { - Document doc = Jsoup.connect(url.toExternalForm()).get(); + Document doc = getDocument(url); for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) { logger.info("LINK: " + link.toString()); if (link.hasAttr("href") @@ -57,9 +57,13 @@ public class ImagearnRipper extends AlbumRipper { @Override public void rip() throws IOException { int index = 0; - logger.info("[ ] Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(url.toExternalForm()).get(); + logger.info("Retrieving " + this.url.toExternalForm()); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); + Document doc = getDocument(this.url); for (Element thumb : doc.select("img.border")) { + if (isStopped()) { + break; + } String image = thumb.attr("src"); image = image.replaceAll("thumbs[0-9]*\\.imagearn\\.com/", "img.imagearn.com/imags/"); index += 1; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java index c43d9d3f..4cc0c58b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -48,10 +47,7 @@ public class ImagebamRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(5000) - .get(); + albumDoc = getDocument(url); } Elements elems = albumDoc.select("legend"); String title = elems.first().text(); @@ -98,11 +94,7 @@ public class ImagebamRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Jsoup.connect(nextUrl) - .userAgent(USER_AGENT) - .timeout(5000) - .referrer(this.url.toExternalForm()) - .get(); + albumDoc = getDocument(nextUrl, this.url.toExternalForm(), null); } // Find thumbnails Elements thumbs = albumDoc.select("div > a[target=_blank]:not(.footera)"); @@ -149,6 +141,7 @@ public class ImagebamRipper extends AlbumRipper { } } + imagebamThreadPool.waitForThreads(); waitForThreads(); } @@ -178,12 +171,7 @@ public class ImagebamRipper extends AlbumRipper { private void fetchImage() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .cookie("nw", "1") - .timeout(5000) - .referrer(this.url.toExternalForm()) - .get(); + Document doc = getDocument(url); // Find image Elements images = doc.select("td > img"); if (images.size() == 0) { @@ -192,7 +180,7 @@ public class ImagebamRipper extends AlbumRipper { } Element image = images.first(); String imgsrc = image.attr("src"); - logger.info("Found URL " + imgsrc + " via " + images.get(0)); + logger.info("Found URL " + imgsrc); // Provide prefix and let the AbstractRipper "guess" the filename String prefix = ""; if (Utils.getConfigBoolean("download.save_order", true)) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java index aa41aeb0..44db9f5d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java @@ -45,7 +45,7 @@ public class ImagefapRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()).get(); + albumDoc = getDocument(url); } String title = albumDoc.title(); Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$"); @@ -92,9 +92,9 @@ public class ImagefapRipper extends AlbumRipper { public void rip() throws IOException { int index = 0; sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - logger.info(" Retrieving " + this.url.toExternalForm()); + logger.info("Retrieving " + this.url); if (albumDoc == null) { - albumDoc = Jsoup.connect(this.url.toExternalForm()).get(); + albumDoc = getDocument(this.url); } while (true) { if (isStopped()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java index c9c7fbfc..655b1421 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java @@ -8,8 +8,6 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Connection.Method; -import org.jsoup.Jsoup; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; @@ -40,20 +38,21 @@ public class ImagestashRipper extends AlbumRipper { String baseURL = "https://imagestash.org/images?tags=" + getGID(this.url); int page = 0, index = 0; while (true) { + if (isStopped()) { + break; + } page++; String nextURL = baseURL + "&page=" + page; logger.info("[ ] Retrieving " + nextURL); sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - String jsonText = Jsoup.connect(nextURL) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .method(Method.GET) - .execute() - .body(); + String jsonText = getResponse(nextURL, true).body(); logger.info(jsonText); JSONObject json = new JSONObject(jsonText); JSONArray images = json.getJSONArray("images"); for (int i = 0; i < images.length(); i++) { + if (isStopped()) { + break; + } JSONObject image = images.getJSONObject(i); String imageURL = image.getString("src"); if (imageURL.startsWith("/")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java index 1d099ec8..adb90bc1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -61,11 +60,7 @@ public class ImagevenueRipper extends AlbumRipper { String nextUrl = this.url.toExternalForm(); logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - Document albumDoc = Jsoup.connect(nextUrl) - .userAgent(USER_AGENT) - .timeout(5000) - .referrer(this.url.toExternalForm()) - .get(); + Document albumDoc = getDocument(nextUrl); // Find thumbnails Elements thumbs = albumDoc.select("a[target=_blank]"); if (thumbs.size() == 0) { @@ -119,11 +114,8 @@ public class ImagevenueRipper extends AlbumRipper { private void fetchImage() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(5000) - .referrer(this.url.toExternalForm()) - .get(); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); + Document doc = getDocument(this.url); // Find image Elements images = doc.select("a > img"); if (images.size() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java index d8b584bf..0c1438f1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -36,11 +35,8 @@ public class ImgboxRipper extends AlbumRipper { @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(this.url); Elements images = doc.select("div.boxed-content > a > img"); if (images.size() == 0) { logger.error("No images found at " + this.url); @@ -48,6 +44,9 @@ public class ImgboxRipper extends AlbumRipper { } int index = 0; for (Element image : images) { + if (isStopped()) { + break; + } index++; String imageUrl = image.attr("src").replace("s.imgbox.com", "i.imgbox.com"); String prefix = ""; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index 1064f493..36464ace 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -83,11 +83,7 @@ public class ImgurRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(10 * 1000) - .maxBodySize(0) - .get(); + albumDoc = getDocument(url); } String title = albumDoc.title(); if (!title.contains(" - Imgur") @@ -261,8 +257,9 @@ public class ImgurRipper extends AlbumRipper { * @throws IOException */ private void ripUserAccount(URL url) throws IOException { - logger.info("[ ] Retrieving " + url.toExternalForm()); - Document doc = Jsoup.connect(url.toExternalForm()).get(); + logger.info("Retrieving " + url); + sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); + Document doc = getDocument(url); for (Element album : doc.select("div.cover a")) { stopCheck(); if (!album.hasAttr("href") @@ -292,10 +289,7 @@ public class ImgurRipper extends AlbumRipper { try { page++; String jsonUrlWithParams = jsonUrl + "?sort=0&order=1&album=0&page=" + page + "&perPage=60"; - String jsonString = Jsoup.connect(jsonUrlWithParams) - .ignoreContentType(true) - .execute() - .body(); + String jsonString = getResponse(jsonUrlWithParams, true).body(); JSONObject json = new JSONObject(jsonString); JSONObject jsonData = json.getJSONObject("data"); if (jsonData.has("count")) { @@ -333,10 +327,7 @@ public class ImgurRipper extends AlbumRipper { } pageURL += "page/" + page + "/miss?scrolled"; logger.info(" Retrieving " + pageURL); - Document doc = Jsoup.connect(pageURL) - .userAgent(USER_AGENT) - .timeout(10 * 1000) - .get(); + Document doc = getDocument(pageURL); Elements imgs = doc.select(".post img"); for (Element img : imgs) { String image = img.attr("src"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index ee65ee1a..5c784bc5 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -8,7 +8,6 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -63,7 +62,7 @@ public class InstagramRipper extends AlbumRipper { } private URL getUserPageFromImage(URL url) throws IOException { - Document doc = Jsoup.connect(url.toExternalForm()).get(); + Document doc = getDocument(url); for (Element element : doc.select("meta[property='og:description']")) { String content = element.attr("content"); if (content.endsWith("'s photo on Instagram")) { @@ -74,9 +73,9 @@ public class InstagramRipper extends AlbumRipper { } private String getUserID(URL url) throws IOException { - logger.info(" Retrieving " + url); + logger.info("Retrieving " + url); this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()).get(); + Document doc = getDocument(url); for (Element element : doc.select("input[id=user_public]")) { return element.attr("value"); } @@ -92,12 +91,7 @@ public class InstagramRipper extends AlbumRipper { String url = baseURL + params; this.sendUpdate(STATUS.LOADING_RESOURCE, url); logger.info(" Retrieving " + url); - String jsonString = Jsoup.connect(url) - .userAgent(USER_AGENT) - .timeout(10000) - .ignoreContentType(true) - .execute() - .body(); + String jsonString = getResponse(url, true).body(); JSONObject json = new JSONObject(jsonString); JSONArray datas = json.getJSONArray("data"); String nextMaxID = ""; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java index d84ae193..21aeb063 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Jsoup; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; @@ -16,8 +15,6 @@ import com.rarchives.ripme.utils.Utils; public class IrarchivesRipper extends AlbumRipper { - private static final int TIMEOUT = 60000; // Long timeout for this poorly-optimized site. - private static final String DOMAIN = "i.rarchives.com", HOST = "irarchives"; @@ -25,6 +22,11 @@ public class IrarchivesRipper extends AlbumRipper { super(url); } + @Override + public int getTimeout() { + return 60 * 1000; + } + @Override public boolean canRip(URL url) { return url.getHost().endsWith(DOMAIN); @@ -46,11 +48,7 @@ public class IrarchivesRipper extends AlbumRipper { @Override public void rip() throws IOException { logger.info(" Retrieving " + this.url); - String jsonString = Jsoup.connect(this.url.toExternalForm()) - .ignoreContentType(true) - .timeout(TIMEOUT) - .execute() - .body(); + String jsonString = getResponse(url, true).body(); JSONObject json = new JSONObject(jsonString); JSONArray posts = json.getJSONArray("posts"); if (posts.length() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java index 596197ec..1de6b612 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java @@ -15,7 +15,6 @@ import javax.swing.JOptionPane; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Jsoup; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; @@ -23,8 +22,6 @@ import com.rarchives.ripme.utils.Utils; public class MediacrushRipper extends AlbumRipper { - private static final int TIMEOUT = 60000; // Long timeout for this poorly-optimized site. - private static final String DOMAIN = "mediacru.sh", HOST = "mediacrush"; @@ -72,12 +69,7 @@ public class MediacrushRipper extends AlbumRipper { sendUpdate(STATUS.LOADING_RESOURCE, url); String jsonString = null; try { - jsonString = Jsoup.connect(url) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .timeout(TIMEOUT) - .execute() - .body(); + jsonString = getResponse(url, true).body(); } catch (Exception re) { // Check for >1024 bit encryption but in older versions of Java if (re.getCause().getCause() instanceof InvalidAlgorithmParameterException) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java index 21fbfc19..282655bc 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java @@ -9,7 +9,6 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -48,9 +47,7 @@ public class MinusRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + albumDoc = getDocument(url); } Elements titles = albumDoc.select("meta[property=og:title]"); if (titles.size() > 0) { @@ -132,11 +129,7 @@ public class MinusRipper extends AlbumRipper { + user + "/shares.json/" + page; logger.info(" Retrieving " + jsonUrl); - Response resp = Jsoup.connect(jsonUrl) - .userAgent(USER_AGENT) - .ignoreContentType(true) - .execute(); - System.err.println(resp.body()); + Response resp = getResponse(jsonUrl, true); JSONObject json = new JSONObject(resp.body()); JSONArray galleries = json.getJSONArray("galleries"); for (int i = 0; i < galleries.length(); i++) { @@ -158,9 +151,7 @@ public class MinusRipper extends AlbumRipper { private void ripAlbum(URL url, String subdir) throws IOException { logger.info(" Retrieving " + url.toExternalForm()); if (albumDoc == null || !subdir.equals("")) { - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + albumDoc = getDocument(url); } Pattern p = Pattern.compile("^.*var gallerydata = (\\{.*\\});.*$", Pattern.DOTALL); Matcher m = p.matcher(albumDoc.data()); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java index e9364cb3..2f81c526 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java @@ -6,12 +6,12 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; public class MotherlessRipper extends AlbumRipper { @@ -61,13 +61,16 @@ public class MotherlessRipper extends AlbumRipper { int index = 0, page = 1; String nextURL = this.url.toExternalForm(); while (nextURL != null) { - logger.info(" Retrieving " + nextURL); - Document doc = Jsoup.connect(nextURL) - .userAgent(USER_AGENT) - .timeout(5000) - .referrer("http://motherless.com") - .get(); + if (isStopped()) { + break; + } + logger.info("Retrieving " + nextURL); + sendUpdate(STATUS.LOADING_RESOURCE, nextURL); + Document doc = getDocument(nextURL, "http://motherless.com", null); for (Element thumb : doc.select("div.thumb a.img-container")) { + if (isStopped()) { + break; + } String thumbURL = thumb.attr("href"); if (thumbURL.contains("pornmd.com")) { continue; @@ -111,11 +114,11 @@ public class MotherlessRipper extends AlbumRipper { @Override public void run() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(5000) - .referrer(this.url.toExternalForm()) - .get(); + if (isStopped()) { + return; + } + String u = this.url.toExternalForm(); + Document doc = getDocument(u, u, null); Pattern p = Pattern.compile("^.*__fileurl = '([^']{1,})';.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.outerHtml()); if (m.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java index 768e2172..1bbb70ca 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java @@ -8,7 +8,6 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -47,9 +46,7 @@ public class NfsfwRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + albumDoc = getDocument(url); } String title = albumDoc.select("h2").first().text().trim(); return "nfsfw_" + Utils.filesystemSafe(title); @@ -90,12 +87,13 @@ public class NfsfwRipper extends AlbumRipper { sendUpdate(STATUS.LOADING_RESOURCE, nextURL); logger.info(" Retrieving " + nextURL); if (albumDoc == null) { - albumDoc = Jsoup.connect(nextURL) - .userAgent(USER_AGENT) - .get(); + albumDoc = getDocument(nextURL); } // Subalbums for (Element suba : albumDoc.select("td.IMG > a")) { + if (isStopped()) { + break; + } String subURL = "http://nfsfw.com" + suba.attr("href"); String subdir = subURL; while (subdir.endsWith("/")) { @@ -106,6 +104,9 @@ public class NfsfwRipper extends AlbumRipper { } // Images for (Element thumb : albumDoc.select("td.giItemCell > div > a")) { + if (isStopped()) { + break; + } String imagePage = "http://nfsfw.com" + thumb.attr("href"); try { NfsfwImageThread t = new NfsfwImageThread(new URL(imagePage), nextSubalbum, ++index); @@ -155,11 +156,8 @@ public class NfsfwRipper extends AlbumRipper { @Override public void run() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(5000) - .referrer(this.url.toExternalForm()) - .get(); + String u = this.url.toExternalForm(); + Document doc = getDocument(u, u, null); Elements images = doc.select(".gbBlock img"); if (images.size() == 0) { logger.error("Failed to find image at " + this.url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java index 81962724..f236df0a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java @@ -11,7 +11,6 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -50,7 +49,7 @@ public class PhotobucketRipper extends AlbumRipper { try { // Attempt to use album title as GID if (pageResponse == null) { - pageResponse = Jsoup.connect(url.toExternalForm()).execute(); + pageResponse = getResponse(url); } Document albumDoc = pageResponse.parse(); Elements els = albumDoc.select("div.libraryTitle > h1"); @@ -132,7 +131,7 @@ public class PhotobucketRipper extends AlbumRipper { if (pageIndex > 1 || pageResponse == null) { url = theUrl + String.format("?sort=3&page=", pageIndex); logger.info(" Retrieving " + url); - pageResponse = Jsoup.connect(url).execute(); + pageResponse = getResponse(url); } Document albumDoc = pageResponse.parse(); // Retrieve JSON from request @@ -190,10 +189,7 @@ public class PhotobucketRipper extends AlbumRipper { + "&json=1"; try { logger.info("Loading " + apiUrl); - Document doc = Jsoup.connect(apiUrl) - .ignoreContentType(true) - .referrer(url) - .get(); + Document doc = getDocument(apiUrl, true); String jsonString = doc.body().html().replace(""", "\""); JSONObject json = new JSONObject(jsonString); JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java index c1ad99b8..a228a4ee 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java @@ -49,10 +49,7 @@ public class PornhubRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Jsoup.connect(url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(TIMEOUT) - .get(); + albumDoc = getDocument(url); } Elements elems = albumDoc.select(".photoAlbumTitleV2"); return HOST + "_" + elems.get(0).text(); @@ -82,7 +79,7 @@ public class PornhubRipper extends AlbumRipper { @Override public void rip() throws IOException { - int index = 0, retries = 3; + int index = 0; String nextUrl = this.url.toExternalForm(); if (isStopped()) { @@ -92,11 +89,7 @@ public class PornhubRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Jsoup.connect(nextUrl) - .userAgent(USER_AGENT) - .timeout(TIMEOUT) - .referrer(this.url.toExternalForm()) - .get(); + albumDoc = getDocument(nextUrl, this.url.toExternalForm(), null); } // Find thumbnails @@ -139,14 +132,11 @@ public class PornhubRipper extends AlbumRipper { private class PornhubImageThread extends Thread { private URL url; private int index; - private File workingDir; - private int retries = 3; public PornhubImageThread(URL url, int index, File workingDir) { super(); this.url = url; this.index = index; - this.workingDir = workingDir; } @Override @@ -156,11 +146,8 @@ public class PornhubRipper extends AlbumRipper { private void fetchImage() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .timeout(TIMEOUT) - .referrer(this.url.toExternalForm()) - .get(); + String u = this.url.toExternalForm(); + Document doc = getDocument(u, u, null); // Find image Elements images = doc.select("#photoImageSection img"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index 1c4a9f3b..b36d115b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -11,7 +11,6 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; @@ -118,10 +117,7 @@ public class RedditRipper extends AlbumRipper { logger.info(" Retrieving " + url); while(doc == null && attempts++ < 3) { try { - doc= Jsoup.connect(url.toExternalForm()) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .get(); + doc = getResponse(url, true).parse(); } catch(SocketTimeoutException ex) { if(attempts >= 3) throw ex; logger.warn(String.format("[!] Connection timed out (attempt %d)", attempts)); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java index 161832ea..98340c6a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java @@ -7,12 +7,14 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.json.JSONObject; +import org.jsoup.Connection.Method; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; public class SeeniveRipper extends AlbumRipper { @@ -40,14 +42,16 @@ public class SeeniveRipper extends AlbumRipper { public void rip() throws IOException { String baseURL = this.url.toExternalForm(); logger.info(" Retrieving " + baseURL); - Document doc = Jsoup.connect(baseURL) - .header("Referer", baseURL) - .userAgent(USER_AGENT) - .timeout(5000) - .get(); + Document doc = getDocument(baseURL, baseURL, null); while (true) { + if (isStopped()) { + break; + } String lastID = null; for (Element element : doc.select("a.facebox")) { + if (isStopped()) { + break; + } String card = element.attr("href"); // "/v/" URL videoURL = new URL("https://seenive.com" + card); SeeniveImageThread vit = new SeeniveImageThread(videoURL); @@ -66,11 +70,7 @@ public class SeeniveRipper extends AlbumRipper { } logger.info("[ ] Retrieving " + baseURL + "/next/" + lastID); - String jsonString = Jsoup.connect(baseURL + "/next/" + lastID) - .header("Referer", baseURL) - .userAgent(USER_AGENT) - .ignoreContentType(true) - .execute().body(); + String jsonString = getResponse(baseURL + "/next/" + lastID, Method.GET, USER_AGENT, baseURL, null, true).body(); JSONObject json = new JSONObject(jsonString); String html = json.getString("Html"); if (html.equals("")) { @@ -111,10 +111,9 @@ public class SeeniveRipper extends AlbumRipper { @Override public void run() { try { - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(this.url); logger.info("[ ] Retreiving video page " + this.url); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); for (Element element : doc.select("source")) { String video = element.attr("src"); synchronized (threadPool) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java index 9028c9c8..a9673a78 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -38,16 +37,16 @@ public class SmuttyRipper extends AlbumRipper { String url, tag = getGID(this.url); boolean hasNextPage = true; while (hasNextPage) { + if (isStopped()) { + break; + } page++; url = "http://smutty.com/h/" + tag + "/?q=%23" + tag + "&page=" + page + "&sort=date&lazy=1"; this.sendUpdate(STATUS.LOADING_RESOURCE, url); logger.info(" Retrieving " + url); Document doc; try { - doc = Jsoup.connect(url) - .userAgent(USER_AGENT) - .ignoreContentType(true) - .get(); + doc = getResponse(url, true).parse(); } catch (IOException e) { if (e.toString().contains("Status=404")) { logger.info("No more pages to load"); @@ -57,6 +56,9 @@ public class SmuttyRipper extends AlbumRipper { break; } for (Element image : doc.select("a.l > img")) { + if (isStopped()) { + break; + } String imageUrl = image.attr("src"); // Construct direct link to image based on thumbnail diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java index 0e1be085..a6475132 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java @@ -7,7 +7,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.HttpStatusException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -48,10 +47,7 @@ public class SupertangasRipper extends AlbumRipper { try { logger.info(" Retrieving " + theURL); sendUpdate(STATUS.LOADING_RESOURCE, theURL); - doc = Jsoup.connect(theURL) - .userAgent(USER_AGENT) - .timeout(5 * 1000) - .get(); + doc = getDocument(theURL); } catch (HttpStatusException e) { logger.debug("Hit end of pages at page " + page, e); break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java index 430def66..84959966 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java @@ -6,12 +6,12 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; public class TeenplanetRipper extends AlbumRipper { @@ -38,7 +38,7 @@ public class TeenplanetRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = Jsoup.connect(url.toExternalForm()).get(); + albumDoc = getDocument(url); } Elements elems = albumDoc.select("div.header > h2"); return HOST + "_" + elems.get(0).text(); @@ -68,9 +68,10 @@ public class TeenplanetRipper extends AlbumRipper { @Override public void rip() throws IOException { int index = 0; - logger.info(" Retrieving " + this.url.toExternalForm()); + logger.info("Retrieving " + this.url); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); if (albumDoc == null) { - albumDoc = Jsoup.connect(this.url.toExternalForm()).get(); + albumDoc = getDocument(url); } for (Element thumb : albumDoc.select("#galleryImages > a > img")) { if (!thumb.hasAttr("src")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java index 90873f08..a379f367 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java @@ -9,10 +9,10 @@ import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; public class TumblrRipper extends AlbumRipper { @@ -66,10 +66,7 @@ public class TumblrRipper extends AlbumRipper { checkURL += url.getHost(); checkURL += "/info?api_key=" + API_KEY; try { - Document doc = Jsoup.connect(checkURL) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .get(); + Document doc = getResponse(checkURL, true).parse(); String jsonString = doc.body().html().replaceAll(""", "\""); JSONObject json = new JSONObject(jsonString); int status = json.getJSONObject("meta").getInt("status"); @@ -90,18 +87,18 @@ public class TumblrRipper extends AlbumRipper { } int offset; for (String mediaType : mediaTypes) { + if (isStopped()) { + break; + } offset = 0; while (true) { if (isStopped()) { break; } String apiURL = getTumblrApiURL(mediaType, offset); - logger.info(" Retrieving " + apiURL); - Document doc = Jsoup.connect(apiURL) - .ignoreContentType(true) - .timeout(10 * 1000) - .header("User-agent", USER_AGENT) - .get(); + logger.info("Retrieving " + apiURL); + sendUpdate(STATUS.LOADING_RESOURCE, apiURL); + Document doc = getResponse(apiURL, true).parse(); try { Thread.sleep(1000); } catch (InterruptedException e) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java index 728c7f78..6479b31c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -47,10 +46,10 @@ public class VidbleRipper extends AlbumRipper { @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url.toExternalForm()); + logger.info("Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); if (albumDoc == null) { - albumDoc = Jsoup.connect(this.url.toExternalForm()).get(); + albumDoc = getDocument(this.url); } Elements els = albumDoc.select("#ContentPlaceHolder1_thumbs"); if (els.size() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java index eaac39a7..0673c86f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java @@ -9,7 +9,6 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.HttpStatusException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; @@ -48,10 +47,7 @@ public class VineRipper extends AlbumRipper { try { logger.info(" Retrieving " + theURL); sendUpdate(STATUS.LOADING_RESOURCE, theURL); - doc = Jsoup.connect(theURL) - .ignoreContentType(true) - .timeout(5 * 1000) - .get(); + doc = getResponse(theURL, true).parse(); } catch (HttpStatusException e) { logger.debug("Hit end of pages at page " + page, e); break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java index 66bc99eb..2d5776b5 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java @@ -7,11 +7,11 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.HttpStatusException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; public class VineboxRipper extends AlbumRipper { @@ -39,9 +39,10 @@ public class VineboxRipper extends AlbumRipper { while (true) { page++; String urlPaged = this.url.toExternalForm() + "?page=" + page; - logger.info(" Retrieving " + urlPaged); + logger.info("Retrieving " + urlPaged); + sendUpdate(STATUS.LOADING_RESOURCE, urlPaged); try { - doc = Jsoup.connect(urlPaged).get(); + doc = getDocument(this.url);; } catch (HttpStatusException e) { logger.debug("Hit end of pages at page " + page, e); break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java index b6d57cde..2a1a43dc 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/XhamsterRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -38,7 +37,7 @@ public class XhamsterRipper extends AlbumRipper { String nextURL = this.url.toExternalForm(); while (nextURL != null) { logger.info(" Retrieving " + nextURL); - Document doc = Jsoup.connect(nextURL).get(); + Document doc = getDocument(nextURL); for (Element thumb : doc.select("table.iListing div.img img")) { if (!thumb.hasAttr("src")) { continue; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/BeegRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/BeegRipper.java index 02fab55e..45d4c7d6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/BeegRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/BeegRipper.java @@ -8,7 +8,6 @@ import java.util.regex.Pattern; import org.json.JSONException; import org.json.JSONObject; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.VideoRipper; @@ -54,10 +53,8 @@ public class BeegRipper extends VideoRipper { @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + logger.info(" Retrieving " + this.url); + Document doc = getDocument(this.url); Pattern p = Pattern.compile("^.*var qualityArr = (.*});.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.html()); if (m.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/GfycatRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/GfycatRipper.java index 4b9c6455..d8427d30 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/GfycatRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/GfycatRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -52,9 +51,7 @@ public class GfycatRipper extends VideoRipper { @Override public void rip() throws IOException { logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(this.url); Elements videos = doc.select("source#mp4source"); if (videos.size() == 0) { throw new IOException("Could not find source#mp4source at " + url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/PornhubRipper.java index c2f79a45..6eea2170 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/PornhubRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/PornhubRipper.java @@ -57,9 +57,7 @@ public class PornhubRipper extends VideoRipper { @Override public void rip() throws IOException { logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(this.url); Pattern p = Pattern.compile("^.*var flashvars = (.*});.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.body().html()); if (m.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/ViddmeRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/ViddmeRipper.java index 6cdffe93..c2e86a96 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/ViddmeRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/ViddmeRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -54,9 +53,7 @@ public class ViddmeRipper extends VideoRipper { @Override public void rip() throws IOException { logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(this.url); Elements videos = doc.select("meta[name=twitter:player:stream]"); if (videos.size() == 0) { throw new IOException("Could not find twitter:player:stream at " + url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/VineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/VineRipper.java index ca75e523..002c26d4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/VineRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/VineRipper.java @@ -6,7 +6,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; @@ -55,9 +54,7 @@ public class VineRipper extends VideoRipper { @Override public void rip() throws IOException { logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + Document doc = getDocument(this.url); Elements props = doc.select("meta[property=twitter:player:stream]"); if (props.size() == 0) { throw new IOException("Could not find meta property 'twitter:player:stream' at " + url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java index 752872c6..437aeff1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/XvideosRipper.java @@ -7,7 +7,6 @@ import java.net.URLDecoder; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -55,10 +54,8 @@ public class XvideosRipper extends VideoRipper { @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + logger.info(" Retrieving " + this.url); + Document doc = getDocument(this.url); Elements embeds = doc.select("embed"); if (embeds.size() == 0) { throw new IOException("Could not find Embed code at " + url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/video/YoupornRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/video/YoupornRipper.java index d4613856..fee63459 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/video/YoupornRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/video/YoupornRipper.java @@ -54,10 +54,8 @@ public class YoupornRipper extends VideoRipper { @Override public void rip() throws IOException { - logger.info(" Retrieving " + this.url.toExternalForm()); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .userAgent(USER_AGENT) - .get(); + logger.info(" Retrieving " + this.url); + Document doc = getDocument(this.url); Elements videos = doc.select("video"); if (videos.size() == 0) { throw new IOException("Could not find Embed code at " + url); diff --git a/src/main/resources/rip.properties b/src/main/resources/rip.properties index 5192f99b..1a0ff01c 100644 --- a/src/main/resources/rip.properties +++ b/src/main/resources/rip.properties @@ -11,6 +11,9 @@ download.retries = 1 # File download timeout (in milliseconds) download.timeout = 60000 +# Page download timeout (in milliseconds) +page.timeout = 5000 + # Maximum size of downloaded files in bytes (required) download.max_size = 104857600