diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractMultiPageRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractMultiPageRipper.java new file mode 100644 index 00000000..67142866 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractMultiPageRipper.java @@ -0,0 +1,80 @@ +package com.rarchives.ripme.ripper; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.List; + +import org.jsoup.nodes.Document; + +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Utils; + +public abstract class AbstractMultiPageRipper extends AlbumRipper { + + public AbstractMultiPageRipper(URL url) throws IOException { + super(url); + } + + public abstract String getDomain(); + public abstract String getHost(); + + public abstract Document getFirstPage() throws IOException; + public abstract Document getNextPage(Document doc) throws IOException; + public abstract List getURLsFromPage(Document page); + public abstract void downloadURL(URL url, int index); + + public boolean keepSortOrder() { + return true; + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(getDomain()); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + int index = 0; + logger.info("Retrieving " + this.url); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); + Document doc = getFirstPage(); + + while (doc != null) { + List imageURLs = getURLsFromPage(doc); + + if (imageURLs.size() == 0) { + throw new IOException("No images found at " + this.url); + } + + for (String imageURL : imageURLs) { + if (isStopped()) { + logger.info("Interrupted"); + break; + } + index += 1; + downloadURL(new URL(imageURL), index); + } + try { + doc = getNextPage(doc); + } catch (IOException e) { + logger.info("Can't get next page: " + e.getMessage()); + break; + } + } + waitForThreads(); + } + + public String getPrefix(int index) { + String prefix = ""; + if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index); + } + return prefix; + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index 1a1016a3..0aadec15 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -12,12 +12,7 @@ import java.util.Observable; import org.apache.log4j.FileAppender; import org.apache.log4j.Logger; -import org.jsoup.Connection; -import org.jsoup.Connection.Method; -import org.jsoup.Connection.Response; import org.jsoup.HttpStatusException; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; import com.rarchives.ripme.ui.RipStatusHandler; import com.rarchives.ripme.ui.RipStatusMessage; @@ -33,8 +28,6 @@ public abstract class AbstractRipper public static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:29.0) Gecko/20100101 Firefox/29.0"; - public static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000); - protected URL url; protected File workingDir; protected DownloadThreadPool threadPool; @@ -61,10 +54,6 @@ public abstract class AbstractRipper } } - protected int getTimeout() { - return TIMEOUT; - } - /** * Ensures inheriting ripper can rip this URL, raises exception if not. * Otherwise initializes working directory and thread pool. @@ -339,6 +328,16 @@ public abstract class AbstractRipper } } } + + public boolean sleep(int milliseconds) { + try { + Thread.sleep(milliseconds); + return true; + } catch (InterruptedException e) { + logger.error("Interrupted while waiting to load next page", e); + return false; + } + } public void setBytesTotal(int bytes) { // Do nothing @@ -348,82 +347,5 @@ public abstract class AbstractRipper } // Thar be overloaded methods afoot - public Document getDocument(URL url) throws IOException { - return getDocument(url.toExternalForm()); - } - public Document getDocument(String url) throws IOException { - return getResponse(url).parse(); - } - public Document getDocument(String url, boolean ignoreContentType) throws IOException { - return getResponse(url, ignoreContentType).parse(); - } - public Document getDocument(String url, Map cookies) throws IOException { - return getResponse(url, cookies).parse(); - } - public Document getDocument(String url, String referrer, Map cookies) throws IOException { - return getResponse(url, Method.GET, referrer, cookies).parse(); - } - public Response getResponse(String url) throws IOException { - return getResponse(url, Method.GET, USER_AGENT, null, null, false); - } - public Response getResponse(URL url) throws IOException { - return getResponse(url.toExternalForm()); - } - public Response getResponse(String url, String referrer) throws IOException { - return getResponse(url, Method.GET, USER_AGENT, referrer, null, false); - } - public Response getResponse(URL url, boolean ignoreContentType) throws IOException { - return getResponse(url.toExternalForm(), Method.GET, USER_AGENT, null, null, ignoreContentType); - } - public Response getResponse(String url, Map cookies) throws IOException { - return getResponse(url, Method.GET, USER_AGENT, cookies); - } - public Response getResponse(String url, String referrer, Map cookies) throws IOException { - return getResponse(url, Method.GET, referrer, cookies); - } - public Response getResponse(String url, Method method) throws IOException { - return getResponse(url, method, USER_AGENT, null, null, false); - } - public Response getResponse(String url, Method method, String referrer, Map cookies) throws IOException { - return getResponse(url, method, USER_AGENT, referrer, cookies, false); - } - public Response getResponse(String url, boolean ignoreContentType) throws IOException { - return getResponse(url, Method.GET, USER_AGENT, null, null, ignoreContentType); - } - public Response getResponse(String url, Method method, boolean ignoreContentType) throws IOException { - return getResponse(url, method, USER_AGENT, null, null, false); - } - - public Response getResponse(String url, - Method method, - String userAgent, - String referrer, - Map cookies, - boolean ignoreContentType) - throws IOException { - Connection connection = Jsoup.connect(url); - - connection.method( (method == null) ? Method.GET : method); - connection.userAgent( (userAgent == null) ? USER_AGENT : userAgent); - connection.ignoreContentType(ignoreContentType); - connection.timeout(getTimeout()); - connection.maxBodySize(0); - - if (cookies != null) { connection.cookies(cookies); } - if (referrer != null) { connection.referrer(referrer); } - - Response response = null; - int retries = Utils.getConfigInteger("download.retries", 1);; - while (retries >= 0) { - retries--; - try { - response = connection.execute(); - } catch (IOException e) { - logger.warn("Error while loading " + url, e); - continue; - } - } - return response; - } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java new file mode 100644 index 00000000..8095ef11 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java @@ -0,0 +1,70 @@ +package com.rarchives.ripme.ripper; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.List; + +import org.jsoup.nodes.Document; + +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Utils; + +public abstract class AbstractSinglePageRipper extends AlbumRipper { + + public AbstractSinglePageRipper(URL url) throws IOException { + super(url); + } + + public abstract String getDomain(); + public abstract String getHost(); + + public abstract Document getFirstPage() throws IOException; + public abstract List getURLsFromPage(Document page); + public abstract void downloadURL(URL url, int index); + + public boolean keepSortOrder() { + return false; + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(getDomain()); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + int index = 0; + logger.info("Retrieving " + this.url); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); + Document doc = getFirstPage(); + List imageURLs = getURLsFromPage(doc); + + if (imageURLs.size() == 0) { + throw new IOException("No images found at " + this.url); + } + + for (String imageURL : imageURLs) { + if (isStopped()) { + logger.info("Interrupted"); + break; + } + index += 1; + downloadURL(new URL(imageURL), index); + } + waitForThreads(); + } + + public String getPrefix(int index) { + String prefix = ""; + if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index); + } + return prefix; + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java index 8a2adb83..ee2f699a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java @@ -3,22 +3,18 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.utils.Utils; +import com.rarchives.ripme.ripper.AbstractSinglePageRipper; +import com.rarchives.ripme.utils.Http; -public class ButttoucherRipper extends AlbumRipper { - - private static final String DOMAIN = "butttoucher.com", - HOST = "butttoucher"; - - private Document albumDoc = null; +public class ButttoucherRipper extends AbstractSinglePageRipper { public ButttoucherRipper(URL url) throws IOException { super(url); @@ -26,9 +22,13 @@ public class ButttoucherRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "butttoucher"; } - + @Override + public String getDomain() { + return "butttoucher.com"; + } + @Override public String getGID(URL url) throws MalformedURLException { Pattern p; Matcher m; @@ -45,38 +45,26 @@ public class ButttoucherRipper extends AlbumRipper { } @Override - public void rip() throws IOException { - logger.info("Retrieving " + this.url); - if (albumDoc == null) { - albumDoc = getDocument(this.url); - } - int index = 0; - for (Element thumb : albumDoc.select("div.image-gallery > a > img")) { + public Document getFirstPage() throws IOException { + return Http.url(this.url).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List thumbs = new ArrayList(); + for (Element thumb : page.select("div.image-gallery > a > img")) { if (!thumb.hasAttr("src")) { continue; } String smallImage = thumb.attr("src"); - String image = smallImage.replace("m.", "."); - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); + thumbs.add(smallImage.replace("m.", ".")); } - waitForThreads(); - } - - public boolean canRip(URL url) { - if (!url.getHost().endsWith(DOMAIN)) { - return false; - } - return true; + return thumbs; } @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index d63fce03..03c32d1b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -3,18 +3,18 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.HashSet; -import java.util.Set; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.utils.Utils; +import com.rarchives.ripme.ripper.AbstractSinglePageRipper; +import com.rarchives.ripme.utils.Http; -public class ChanRipper extends AlbumRipper { +public class ChanRipper extends AbstractSinglePageRipper { public ChanRipper(URL url) throws IOException { super(url); @@ -43,10 +43,6 @@ public class ChanRipper extends AlbumRipper { || url.toExternalForm().contains("/thread/")); // 4chan } - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - @Override public String getGID(URL url) throws MalformedURLException { Pattern p; Matcher m; @@ -74,13 +70,20 @@ public class ChanRipper extends AlbumRipper { } @Override - public void rip() throws IOException { - Set attempted = new HashSet(); - int index = 0; + public String getDomain() { + return this.url.getHost(); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(this.url).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); Pattern p; Matcher m; - logger.info("Retrieving " + this.url); - Document doc = getDocument(this.url); - for (Element link : doc.select("a")) { + for (Element link : page.select("a")) { if (!link.hasAttr("href")) { continue; } @@ -104,20 +107,20 @@ public class ChanRipper extends AlbumRipper { if (image.startsWith("/")) { image = "http://" + this.url.getHost() + image; } - if (attempted.contains(image)) { + // Don't download the same URL twice + if (imageURLs.contains(image)) { logger.debug("Already attempted: " + image); continue; } - index += 1; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); - attempted.add(image); + imageURLs.add(image); } } - waitForThreads(); + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index d0e1407f..a221ee15 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -6,148 +6,152 @@ import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractMultiPageRipper; import com.rarchives.ripme.utils.Base64; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class DeviantartRipper extends AlbumRipper { - - private static final String DOMAIN = "deviantart.com", - HOST = "deviantart"; +public class DeviantartRipper extends AbstractMultiPageRipper { private static final int SLEEP_TIME = 2000; private Map cookies = new HashMap(); + private Set triedURLs = new HashSet(); public DeviantartRipper(URL url) throws IOException { super(url); } @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + public String getHost() { + return "deviantart"; + } + @Override + public String getDomain() { + return "deviantart.com"; } @Override public URL sanitizeURL(URL url) throws MalformedURLException { String u = url.toExternalForm(); - u = u.replaceAll("\\?.*", ""); + String subdir = "/"; + if (u.contains("catpath=scraps")) { + subdir = "scraps"; + } + u = u.replaceAll("\\?.*", "?catpath=" + subdir); return new URL(u); } @Override - public void rip() throws IOException { - int index = 0; - String nextURL = this.url.toExternalForm(); + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]+)\\.deviantart\\.com(/gallery)?/?(\\?.*)?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Root gallery + if (url.toExternalForm().contains("catpath=scraps")) { + return m.group(1) + "_scraps"; + } + else { + return m.group(1); + } + } + p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Subgallery + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url); + } + @Override + public Document getFirstPage() throws IOException { // Login try { cookies = loginToDeviantart(); } catch (Exception e) { logger.warn("Failed to login: ", e); } + return Http.url(this.url) + .cookies(cookies) + .get(); + } - // Iterate over every page - while (nextURL != null) { + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); - logger.info(" Retrieving " + nextURL); - sendUpdate(STATUS.LOADING_RESOURCE, "Retrieving " + nextURL); - Document doc = getDocument(nextURL, cookies); - - // Iterate over all thumbnails - for (Element thumb : doc.select("div.zones-container a.thumb")) { - if (isStopped()) { - break; - } - Element img = thumb.select("img").get(0); - if (img.attr("transparent").equals("false")) { - continue; // a.thumbs to other albums are invisible - } - - index++; - - String fullSize = null; - try { - fullSize = thumbToFull(img.attr("src"), true); - } catch (Exception e) { - logger.info("Attempting to get full size image from " + thumb.attr("href")); - fullSize = smallToFull(img.attr("src"), thumb.attr("href")); - if (fullSize == null) { - continue; - } - } - - try { - URL fullsizeURL = new URL(fullSize); - String imageId = fullSize.substring(fullSize.lastIndexOf('-') + 1); - imageId = imageId.substring(0, imageId.indexOf('.')); - long imageIdLong = alphaToLong(imageId); - addURLToDownload(fullsizeURL, String.format("%010d_", imageIdLong)); - } catch (MalformedURLException e) { - logger.error("[!] Invalid thumbnail image: " + fullSize); - continue; - } - } - - try { - Thread.sleep(SLEEP_TIME); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting for page to load", e); + // Iterate over all thumbnails + for (Element thumb : page.select("div.zones-container a.thumb")) { + if (isStopped()) { break; } - - // Find the next page - nextURL = null; - for (Element nextButton : doc.select("a.away")) { - if (nextButton.attr("href").contains("offset=" + index)) { - nextURL = this.url.toExternalForm() + "?offset=" + index; - } + Element img = thumb.select("img").get(0); + if (img.attr("transparent").equals("false")) { + continue; // a.thumbs to other albums are invisible } - if (nextURL == null) { - logger.info("No next button found"); + + // Get full-sized image via helper methods + String fullSize = null; + try { + fullSize = thumbToFull(img.attr("src"), true); + } catch (Exception e) { + logger.info("Attempting to get full size image from " + thumb.attr("href")); + fullSize = smallToFull(img.attr("src"), thumb.attr("href")); } + + if (fullSize == null) { + continue; + } + if (triedURLs.contains(fullSize)) { + logger.warn("Already tried to download " + fullSize); + continue; + } + triedURLs.add(fullSize); + imageURLs.add(fullSize); } - waitForThreads(); + return imageURLs; + } + + @Override + public Document getNextPage(Document page) throws IOException { + Elements nextButtons = page.select("li.next > a"); + if (nextButtons.size() == 0) { + throw new IOException("No next page found"); + } + Element a = nextButtons.first(); + if (a.hasClass("disabled")) { + throw new IOException("Hit end of pages"); + } + String nextPage = a.attr("href"); + if (nextPage.startsWith("/")) { + nextPage = "http://" + this.url.getHost() + nextPage; + } + if (!sleep(SLEEP_TIME)) { + throw new IOException("Interrupted while waiting to load next page: " + nextPage); + } + logger.info("Found next page: " + nextPage); + return Http.url(nextPage) + .cookies(cookies) + .get(); } - /** - * Convert alpha-numeric string into a corresponding number - * @param alpha String to convert - * @return Numeric representation of 'alpha' - */ - public static long alphaToLong(String alpha) { - long result = 0; - for (int i = 0; i < alpha.length(); i++) { - result += charToInt(alpha, i); - } - return result; - } - - /** - * Convert character at index in a string 'text' to numeric form (base-36) - * @param text Text to retrieve the character from - * @param index Index of the desired character - * @return Number representing character at text[index] - */ - private static int charToInt(String text, int index) { - char c = text.charAt(text.length() - index - 1); - c = Character.toLowerCase(c); - int number = "0123456789abcdefghijklmnopqrstuvwxyz".indexOf(c); - number *= Math.pow(36, index); - return number; + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); } /** @@ -163,7 +167,6 @@ public class DeviantartRipper extends AlbumRipper { fields.remove(4); if (!fields.get(4).equals("f") && throwException) { // Not a full-size image - logger.warn("Can't get full size image from " + thumb); throw new Exception("Can't get full size image from " + thumb); } StringBuilder result = new StringBuilder(); @@ -187,27 +190,20 @@ public class DeviantartRipper extends AlbumRipper { public String smallToFull(String thumb, String page) { try { // Fetch the image page - Response resp = getResponse(page, Method.GET, USER_AGENT, this.url.toExternalForm(), cookies, false); - Map cookies = resp.cookies(); - cookies.putAll(this.cookies); + Response resp = Http.url(page) + .referrer(this.url) + .cookies(cookies) + .response(); + cookies.putAll(resp.cookies()); // Try to find the "Download" box Elements els = resp.parse().select("a.dev-page-download"); if (els.size() == 0) { - throw new IOException("no download page found"); + throw new IOException("No download page found"); } // Full-size image String fsimage = els.get(0).attr("href"); - - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - String imageId = fsimage.substring(fsimage.lastIndexOf('-') + 1); - imageId = imageId.substring(0, imageId.indexOf('.')); - prefix = String.format("%010d_", alphaToLong(imageId)); - } - // Download it - addURLToDownload(new URL(fsimage), prefix, "", page, cookies); - return null; + return fsimage; } catch (IOException ioe) { try { logger.info("Failed to get full size download image at " + page + " : '" + ioe.getMessage() + "'"); @@ -220,28 +216,6 @@ public class DeviantartRipper extends AlbumRipper { } } - @Override - public String getHost() { - return HOST; - } - - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com(/gallery)?/?$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - // Root gallery - return m.group(1); - } - p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - // Subgallery - return m.group(1) + "_" + m.group(2); - } - throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url); - } - /** * Logs into deviant art. Required to rip full-size NSFW content. * @return Map of cookies containing session data. @@ -254,7 +228,8 @@ public class DeviantartRipper extends AlbumRipper { if (username == null || password == null) { throw new IOException("could not find username or password in config"); } - Response resp = getResponse("http://www.deviantart.com/"); + Response resp = Http.url("http://www.deviantart.com/") + .response(); for (Element input : resp.parse().select("form#form-login input[type=hidden]")) { postData.put(input.attr("name"), input.attr("value")); } @@ -263,17 +238,17 @@ public class DeviantartRipper extends AlbumRipper { postData.put("remember_me", "1"); // Send login request - resp = Jsoup.connect("https://www.deviantart.com/users/login") + resp = Http.url("https://www.deviantart.com/users/login") .userAgent(USER_AGENT) .data(postData) .cookies(resp.cookies()) .method(Method.POST) - .execute(); + .response(); // Assert we are logged in if (resp.hasHeader("Location") && resp.header("Location").contains("password")) { // Wrong password - throw new IOException("Wrong pasword"); + throw new IOException("Wrong password"); } if (resp.url().toExternalForm().contains("bad_form")) { throw new IOException("Login form was incorrectly submitted"); @@ -285,5 +260,4 @@ public class DeviantartRipper extends AlbumRipper { // We are logged in, save the cookies return resp.cookies(); } - } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java index 69323bbc..3a313047 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java @@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -10,14 +12,10 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; -import com.rarchives.ripme.utils.Utils; +import com.rarchives.ripme.ripper.AbstractMultiPageRipper; +import com.rarchives.ripme.utils.Http; -public class DrawcrowdRipper extends AlbumRipper { - - private static final String DOMAIN = "drawcrowd.com", - HOST = "drawcrowd"; +public class DrawcrowdRipper extends AbstractMultiPageRipper { public DrawcrowdRipper(URL url) throws IOException { super(url); @@ -25,14 +23,11 @@ public class DrawcrowdRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "drawcrowd"; } - - /** - * Reformat given URL into the desired format (all images on single page) - */ - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; + @Override + public String getDomain() { + return "drawcrowd.com"; } @Override @@ -58,46 +53,39 @@ public class DrawcrowdRipper extends AlbumRipper { } @Override - public void rip() throws IOException { - int index = 0; - sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - logger.info("Retrieving " + this.url); - Document albumDoc = getDocument(this.url); - while (true) { - if (isStopped()) { - break; - } - for (Element thumb : albumDoc.select("div.item.asset img")) { - String image = thumb.attr("src"); - image = image - .replaceAll("/medium/", "/large/") - .replaceAll("/small/", "/large/"); - index++; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(image), prefix); - } - Elements loadMore = albumDoc.select("a#load-more"); - if (loadMore.size() == 0) { - break; - } - String nextURL = "http://drawcrowd.com" + loadMore.get(0).attr("href"); - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting to load next page", e); - throw new IOException(e); - } - sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - albumDoc = getDocument(nextURL); - } - waitForThreads(); + public Document getFirstPage() throws IOException { + return Http.url(this.url).get(); } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + @Override + public Document getNextPage(Document doc) throws IOException { + Elements loadMore = doc.select("a#load-more"); + if (loadMore.size() == 0) { + throw new IOException("No next page found"); + } + if (!sleep(1000)) { + throw new IOException("Interrupted while waiting for next page"); + } + String nextPage = "http://drawcrowd.com" + loadMore.get(0).attr("href"); + return Http.url(nextPage).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); + for (Element thumb : page.select("div.item.asset img")) { + String image = thumb.attr("src"); + image = image + .replaceAll("/medium/", "/large/") + .replaceAll("/small/", "/large/"); + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 75054ae1..26364cbf 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -16,6 +16,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class EHentaiRipper extends AlbumRipper { @@ -57,7 +58,9 @@ public class EHentaiRipper extends AlbumRipper { if (albumDoc == null) { sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); logger.info("Retrieving " + url); - albumDoc = getDocument(url.toExternalForm(), cookies); + albumDoc = Http.url(url) + .cookies(cookies) + .get(); } Elements elems = albumDoc.select("#gn"); return HOST + "_" + elems.get(0).text(); @@ -96,7 +99,10 @@ public class EHentaiRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = getDocument(nextUrl, this.url.toExternalForm(), cookies); + albumDoc = Http.url(nextUrl) + .referrer(this.url) + .cookies(cookies) + .get(); } // Check for rate limiting if (albumDoc.toString().contains("IP address will be automatically banned")) { @@ -197,8 +203,10 @@ public class EHentaiRipper extends AlbumRipper { private void fetchImage() { try { - String u = this.url.toExternalForm(); - Document doc = getDocument(u, u, cookies); + Document doc = Http.url(this.url) + .referrer(this.url) + .cookies(cookies) + .get(); // Check for rate limit if (doc.toString().contains("IP address will be automatically banned")) { if (this.retries == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index 9597994b..d79e1f35 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -12,6 +12,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class EightmusesRipper extends AlbumRipper { @@ -40,7 +41,7 @@ public class EightmusesRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Element titleElement = albumDoc.select("meta[name=description]").first(); String title = titleElement.attr("content"); @@ -63,7 +64,7 @@ public class EightmusesRipper extends AlbumRipper { logger.info(" Retrieving " + url); sendUpdate(STATUS.LOADING_RESOURCE, url); if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } int index = 0; // Both album index and image index diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java index 6f9251d4..79185d49 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java @@ -11,6 +11,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class FapprovedRipper extends AlbumRipper { @@ -46,7 +47,9 @@ public class FapprovedRipper extends AlbumRipper { url = "http://fapproved.com/users/" + user + "/images?page=" + page; this.sendUpdate(STATUS.LOADING_RESOURCE, url); logger.info(" Retrieving " + url); - Document doc = getDocument(url, true); + Document doc = Http.url(url) + .ignoreContentType() + .get(); for (Element image : doc.select("div.actual-image img")) { String imageUrl = image.attr("src"); if (imageUrl.startsWith("//")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index e91879f1..00cf63a8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -20,6 +20,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Base64; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class FlickrRipper extends AlbumRipper { @@ -61,7 +62,7 @@ public class FlickrRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } String user = url.toExternalForm(); user = user.substring(user.indexOf("/photos/") + "/photos/".length()); @@ -124,7 +125,7 @@ public class FlickrRipper extends AlbumRipper { } logger.info(" Retrieving " + nextURL); if (albumDoc == null) { - albumDoc = getDocument(nextURL); + albumDoc = Http.url(nextURL).get(); } for (Element thumb : albumDoc.select("a[data-track=photo-click]")) { String imageTitle = null; @@ -259,7 +260,7 @@ public class FlickrRipper extends AlbumRipper { private Document getLargestImagePageDocument(URL url) throws IOException { // Get current page - Document doc = getDocument(url); + Document doc = Http.url(url).get(); // Look for larger image page String largestImagePage = this.url.toExternalForm(); for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) { @@ -277,7 +278,7 @@ public class FlickrRipper extends AlbumRipper { } if (!largestImagePage.equals(this.url.toExternalForm())) { // Found larger image page, get it. - doc = getDocument(largestImagePage); + doc = Http.url(largestImagePage).get(); } return doc; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java index abc51fc7..a1f92075 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java @@ -10,13 +10,13 @@ import java.util.regex.Pattern; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class GifyoRipper extends AlbumRipper { @@ -50,7 +50,9 @@ public class GifyoRipper extends AlbumRipper { logger.info(" Retrieving " + this.url + "(page #" + page + ")"); Response resp = null; if (page == 0) { - resp = getResponse(this.url, true); + resp = Http.url(this.url) + .ignoreContentType() + .response(); cookies = resp.cookies(); } else { @@ -59,13 +61,12 @@ public class GifyoRipper extends AlbumRipper { postData.put("view", "gif"); postData.put("layout", "grid"); postData.put("page", Integer.toString(page)); - resp = Jsoup.connect(this.url.toExternalForm()) - .ignoreContentType(true) - .userAgent(USER_AGENT) + resp = Http.url(this.url) + .ignoreContentType() .data(postData) .cookies(cookies) .method(Method.POST) - .execute(); + .response(); cookies.putAll(resp.cookies()); } Document doc = resp.parse(); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java index eee06858..84ea2936 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java @@ -12,6 +12,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class GirlsOfDesireRipper extends AlbumRipper { // All sleep times are in milliseconds @@ -41,7 +42,7 @@ public class GirlsOfDesireRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Elements elems = albumDoc.select(".albumName"); return HOST + "_" + elems.first().text(); @@ -76,7 +77,7 @@ public class GirlsOfDesireRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = getDocument(nextUrl); + albumDoc = Http.url(nextUrl).get(); } // Find thumbnails diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java index faba994f..eb072186 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java @@ -8,9 +8,9 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Jsoup; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class GonewildRipper extends AlbumRipper { @@ -49,7 +49,7 @@ public class GonewildRipper extends AlbumRipper { + "?method=get_user" + "&user=" + username + "&count=" + count; - String gwURL, jsonString, imagePath; + String gwURL, imagePath; JSONArray posts, images; JSONObject json, post, image; while (true) { @@ -57,8 +57,8 @@ public class GonewildRipper extends AlbumRipper { gwURL = baseGwURL + "&start=" + start; start += count; - jsonString = getResponse(gwURL, true).body(); - json = new JSONObject(jsonString); + json = Http.url(gwURL) + .getJSON(); if (json.has("error")) { logger.error("Error while retrieving user posts:" + json.getString("error")); break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java index 9785736a..f98b5e86 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java @@ -14,6 +14,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class HentaifoundryRipper extends AlbumRipper { @@ -40,11 +41,12 @@ public class HentaifoundryRipper extends AlbumRipper { int index = 0; // Get cookies - Response resp = getResponse("http://www.hentai-foundry.com/"); + Response resp = Http.url("http://www.hentai-foundry.com/").response(); Map cookies = resp.cookies(); - resp = getResponse("http://www.hentai-foundry.com/?enterAgree=1&size=1500", - "http://www.hentai-foundry.com/", - cookies); + resp = Http.url("http://www.hentai-foundry.com/?enterAgree=1&size=1500") + .referrer("http://www.hentai-foundry.com/") + .cookies(cookies) + .response(); cookies = resp.cookies(); logger.info("cookies: " + cookies); @@ -54,7 +56,10 @@ public class HentaifoundryRipper extends AlbumRipper { break; } sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - Document doc = getDocument(nextURL, this.url.toExternalForm(), cookies); + Document doc = Http.url(nextURL) + .referrer(this.url) + .cookies(cookies) + .get(); for (Element thumb : doc.select("td > a:first-child")) { if (isStopped()) { break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java index ea94fb22..d9d35375 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagearnRipper.java @@ -11,6 +11,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImagearnRipper extends AlbumRipper { @@ -41,7 +42,7 @@ public class ImagearnRipper extends AlbumRipper { } private URL getGalleryFromImage(URL url) throws IOException { - Document doc = getDocument(url); + Document doc = Http.url(url).get(); for (Element link : doc.select("a[href~=^gallery\\.php.*$]")) { logger.info("LINK: " + link.toString()); if (link.hasAttr("href") @@ -59,7 +60,7 @@ public class ImagearnRipper extends AlbumRipper { int index = 0; logger.info("Retrieving " + this.url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - Document doc = getDocument(this.url); + Document doc = Http.url(this.url).get(); for (Element thumb : doc.select("img.border")) { if (isStopped()) { break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java index 4cc0c58b..0b0c4fbd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagebamRipper.java @@ -13,6 +13,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImagebamRipper extends AlbumRipper { @@ -47,7 +48,7 @@ public class ImagebamRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Elements elems = albumDoc.select("legend"); String title = elems.first().text(); @@ -94,7 +95,9 @@ public class ImagebamRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = getDocument(nextUrl, this.url.toExternalForm(), null); + albumDoc = Http.url(nextUrl) + .referrer(this.url) + .get(); } // Find thumbnails Elements thumbs = albumDoc.select("div > a[target=_blank]:not(.footera)"); @@ -171,7 +174,7 @@ public class ImagebamRipper extends AlbumRipper { private void fetchImage() { try { - Document doc = getDocument(url); + Document doc = Http.url(url).get(); // Find image Elements images = doc.select("td > img"); if (images.size() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java index 44db9f5d..c527741c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagefapRipper.java @@ -12,6 +12,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImagefapRipper extends AlbumRipper { @@ -45,7 +46,7 @@ public class ImagefapRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } String title = albumDoc.title(); Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$"); @@ -94,7 +95,7 @@ public class ImagefapRipper extends AlbumRipper { sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); logger.info("Retrieving " + this.url); if (albumDoc == null) { - albumDoc = getDocument(this.url); + albumDoc = Http.url(this.url).get(); } while (true) { if (isStopped()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java index 655b1421..1023cb3c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagestashRipper.java @@ -11,6 +11,7 @@ import org.json.JSONObject; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImagestashRipper extends AlbumRipper { @@ -45,9 +46,7 @@ public class ImagestashRipper extends AlbumRipper { String nextURL = baseURL + "&page=" + page; logger.info("[ ] Retrieving " + nextURL); sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - String jsonText = getResponse(nextURL, true).body(); - logger.info(jsonText); - JSONObject json = new JSONObject(jsonText); + JSONObject json = Http.url(nextURL).getJSON(); JSONArray images = json.getJSONArray("images"); for (int i = 0; i < images.length(); i++) { if (isStopped()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java index adb90bc1..749b0dd4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImagevenueRipper.java @@ -13,6 +13,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImagevenueRipper extends AlbumRipper { @@ -60,7 +61,7 @@ public class ImagevenueRipper extends AlbumRipper { String nextUrl = this.url.toExternalForm(); logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - Document albumDoc = getDocument(nextUrl); + Document albumDoc = Http.url(nextUrl).get(); // Find thumbnails Elements thumbs = albumDoc.select("a[target=_blank]"); if (thumbs.size() == 0) { @@ -115,7 +116,7 @@ public class ImagevenueRipper extends AlbumRipper { private void fetchImage() { try { sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - Document doc = getDocument(this.url); + Document doc = Http.url(this.url).get(); // Find image Elements images = doc.select("a > img"); if (images.size() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java index 0c1438f1..e7451979 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgboxRipper.java @@ -12,6 +12,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImgboxRipper extends AlbumRipper { @@ -36,7 +37,7 @@ public class ImgboxRipper extends AlbumRipper { @Override public void rip() throws IOException { sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = getDocument(this.url); + Document doc = Http.url(this.url).get(); Elements images = doc.select("div.boxed-content > a > img"); if (images.size() == 0) { logger.error("No images found at " + this.url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index 36464ace..0e963422 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -19,6 +19,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class ImgurRipper extends AlbumRipper { @@ -83,7 +84,7 @@ public class ImgurRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } String title = albumDoc.title(); if (!title.contains(" - Imgur") @@ -259,7 +260,7 @@ public class ImgurRipper extends AlbumRipper { private void ripUserAccount(URL url) throws IOException { logger.info("Retrieving " + url); sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = getDocument(url); + Document doc = Http.url(url).get(); for (Element album : doc.select("div.cover a")) { stopCheck(); if (!album.hasAttr("href") @@ -289,8 +290,7 @@ public class ImgurRipper extends AlbumRipper { try { page++; String jsonUrlWithParams = jsonUrl + "?sort=0&order=1&album=0&page=" + page + "&perPage=60"; - String jsonString = getResponse(jsonUrlWithParams, true).body(); - JSONObject json = new JSONObject(jsonString); + JSONObject json = Http.url(jsonUrlWithParams).getJSON(); JSONObject jsonData = json.getJSONObject("data"); if (jsonData.has("count")) { imagesTotal = jsonData.getInt("count"); @@ -327,7 +327,7 @@ public class ImgurRipper extends AlbumRipper { } pageURL += "page/" + page + "/miss?scrolled"; logger.info(" Retrieving " + pageURL); - Document doc = getDocument(pageURL); + Document doc = Http.url(pageURL).get(); Elements imgs = doc.select(".post img"); for (Element img : imgs) { String image = img.attr("src"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 5c784bc5..4ef7c5e1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -13,6 +13,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class InstagramRipper extends AlbumRipper { @@ -62,7 +63,7 @@ public class InstagramRipper extends AlbumRipper { } private URL getUserPageFromImage(URL url) throws IOException { - Document doc = getDocument(url); + Document doc = Http.url(url).get(); for (Element element : doc.select("meta[property='og:description']")) { String content = element.attr("content"); if (content.endsWith("'s photo on Instagram")) { @@ -75,7 +76,7 @@ public class InstagramRipper extends AlbumRipper { private String getUserID(URL url) throws IOException { logger.info("Retrieving " + url); this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); - Document doc = getDocument(url); + Document doc = Http.url(url).get(); for (Element element : doc.select("input[id=user_public]")) { return element.attr("value"); } @@ -91,8 +92,7 @@ public class InstagramRipper extends AlbumRipper { String url = baseURL + params; this.sendUpdate(STATUS.LOADING_RESOURCE, url); logger.info(" Retrieving " + url); - String jsonString = getResponse(url, true).body(); - JSONObject json = new JSONObject(jsonString); + JSONObject json = Http.url(url).getJSON(); JSONArray datas = json.getJSONArray("data"); String nextMaxID = ""; if (datas.length() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java index 21aeb063..5096830f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/IrarchivesRipper.java @@ -11,6 +11,7 @@ import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class IrarchivesRipper extends AlbumRipper { @@ -22,11 +23,6 @@ public class IrarchivesRipper extends AlbumRipper { super(url); } - @Override - public int getTimeout() { - return 60 * 1000; - } - @Override public boolean canRip(URL url) { return url.getHost().endsWith(DOMAIN); @@ -48,8 +44,9 @@ public class IrarchivesRipper extends AlbumRipper { @Override public void rip() throws IOException { logger.info(" Retrieving " + this.url); - String jsonString = getResponse(url, true).body(); - JSONObject json = new JSONObject(jsonString); + JSONObject json = Http.url(url) + .timeout(60 * 1000) + .getJSON(); JSONArray posts = json.getJSONArray("posts"); if (posts.length() == 0) { logger.error("No posts found at " + this.url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java index 1de6b612..e801b2ad 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MediacrushRipper.java @@ -18,6 +18,7 @@ import org.json.JSONObject; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class MediacrushRipper extends AlbumRipper { @@ -67,9 +68,9 @@ public class MediacrushRipper extends AlbumRipper { String url = this.url.toExternalForm(); logger.info(" Retrieving " + url); sendUpdate(STATUS.LOADING_RESOURCE, url); - String jsonString = null; + JSONObject json = null; try { - jsonString = getResponse(url, true).body(); + json = Http.url(url).getJSON(); } catch (Exception re) { // Check for >1024 bit encryption but in older versions of Java if (re.getCause().getCause() instanceof InvalidAlgorithmParameterException) { @@ -96,7 +97,6 @@ public class MediacrushRipper extends AlbumRipper { } // Convert to JSON - JSONObject json = new JSONObject(jsonString); if (!json.has("files")) { sendUpdate(STATUS.RIP_ERRORED, "No files found at " + url); throw new IOException("Could not find any files at " + url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java index 282655bc..05b91a82 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MinusRipper.java @@ -8,11 +8,11 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class MinusRipper extends AlbumRipper { @@ -47,7 +47,7 @@ public class MinusRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Elements titles = albumDoc.select("meta[property=og:title]"); if (titles.size() > 0) { @@ -129,8 +129,7 @@ public class MinusRipper extends AlbumRipper { + user + "/shares.json/" + page; logger.info(" Retrieving " + jsonUrl); - Response resp = getResponse(jsonUrl, true); - JSONObject json = new JSONObject(resp.body()); + JSONObject json = Http.url(jsonUrl).getJSON(); JSONArray galleries = json.getJSONArray("galleries"); for (int i = 0; i < galleries.length(); i++) { JSONObject gallery = galleries.getJSONObject(i); @@ -151,7 +150,7 @@ public class MinusRipper extends AlbumRipper { private void ripAlbum(URL url, String subdir) throws IOException { logger.info(" Retrieving " + url.toExternalForm()); if (albumDoc == null || !subdir.equals("")) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Pattern p = Pattern.compile("^.*var gallerydata = (\\{.*\\});.*$", Pattern.DOTALL); Matcher m = p.matcher(albumDoc.data()); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java index 2f81c526..62369ce7 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java @@ -12,6 +12,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class MotherlessRipper extends AlbumRipper { @@ -66,7 +67,9 @@ public class MotherlessRipper extends AlbumRipper { } logger.info("Retrieving " + nextURL); sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - Document doc = getDocument(nextURL, "http://motherless.com", null); + Document doc = Http.url(nextURL) + .referrer("http://motherless.com") + .get(); for (Element thumb : doc.select("div.thumb a.img-container")) { if (isStopped()) { break; @@ -118,7 +121,9 @@ public class MotherlessRipper extends AlbumRipper { return; } String u = this.url.toExternalForm(); - Document doc = getDocument(u, u, null); + Document doc = Http.url(u) + .referrer(u) + .get(); Pattern p = Pattern.compile("^.*__fileurl = '([^']{1,})';.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.outerHtml()); if (m.matches()) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java index 1bbb70ca..c7d98c15 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java @@ -15,6 +15,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class NfsfwRipper extends AlbumRipper { @@ -46,7 +47,7 @@ public class NfsfwRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } String title = albumDoc.select("h2").first().text().trim(); return "nfsfw_" + Utils.filesystemSafe(title); @@ -87,7 +88,7 @@ public class NfsfwRipper extends AlbumRipper { sendUpdate(STATUS.LOADING_RESOURCE, nextURL); logger.info(" Retrieving " + nextURL); if (albumDoc == null) { - albumDoc = getDocument(nextURL); + albumDoc = Http.url(nextURL).get(); } // Subalbums for (Element suba : albumDoc.select("td.IMG > a")) { @@ -156,8 +157,9 @@ public class NfsfwRipper extends AlbumRipper { @Override public void run() { try { - String u = this.url.toExternalForm(); - Document doc = getDocument(u, u, null); + Document doc = Http.url(this.url) + .referrer(this.url) + .get(); Elements images = doc.select(".gbBlock img"); if (images.size() == 0) { logger.error("Failed to find image at " + this.url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java index f236df0a..ce425943 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java @@ -16,6 +16,7 @@ import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.utils.Http; public class PhotobucketRipper extends AlbumRipper { @@ -49,7 +50,7 @@ public class PhotobucketRipper extends AlbumRipper { try { // Attempt to use album title as GID if (pageResponse == null) { - pageResponse = getResponse(url); + pageResponse = Http.url(url).response(); } Document albumDoc = pageResponse.parse(); Elements els = albumDoc.select("div.libraryTitle > h1"); @@ -131,7 +132,7 @@ public class PhotobucketRipper extends AlbumRipper { if (pageIndex > 1 || pageResponse == null) { url = theUrl + String.format("?sort=3&page=", pageIndex); logger.info(" Retrieving " + url); - pageResponse = getResponse(url); + pageResponse = Http.url(url).response(); } Document albumDoc = pageResponse.parse(); // Retrieve JSON from request @@ -189,9 +190,7 @@ public class PhotobucketRipper extends AlbumRipper { + "&json=1"; try { logger.info("Loading " + apiUrl); - Document doc = getDocument(apiUrl, true); - String jsonString = doc.body().html().replace(""", "\""); - JSONObject json = new JSONObject(jsonString); + JSONObject json = Http.url(apiUrl).getJSON(); JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums"); for (int i = 0; i < subalbums.length(); i++) { String suburl = diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java index a228a4ee..997d3cb9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java @@ -7,7 +7,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -15,12 +14,12 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class PornhubRipper extends AlbumRipper { // All sleep times are in milliseconds private static final int IMAGE_SLEEP_TIME = 1 * 1000; - private static final int TIMEOUT = 5 * 1000; private static final String DOMAIN = "pornhub.com", HOST = "Pornhub"; @@ -49,7 +48,7 @@ public class PornhubRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving " + url.toExternalForm()); sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Elements elems = albumDoc.select(".photoAlbumTitleV2"); return HOST + "_" + elems.get(0).text(); @@ -89,7 +88,9 @@ public class PornhubRipper extends AlbumRipper { if (albumDoc == null) { logger.info(" Retrieving album page " + nextUrl); sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = getDocument(nextUrl, this.url.toExternalForm(), null); + albumDoc = Http.url(nextUrl) + .referrer(this.url) + .get(); } // Find thumbnails @@ -146,8 +147,9 @@ public class PornhubRipper extends AlbumRipper { private void fetchImage() { try { - String u = this.url.toExternalForm(); - Document doc = getDocument(u, u, null); + Document doc = Http.url(this.url) + .referrer(this.url) + .get(); // Find image Elements images = doc.select("#photoImageSection img"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index b36d115b..4cad6885 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -11,9 +11,9 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; -import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; @@ -113,27 +113,25 @@ public class RedditRipper extends AlbumRipper { lastRequestTime = System.currentTimeMillis(); int attempts = 0; - Document doc = null; logger.info(" Retrieving " + url); - while(doc == null && attempts++ < 3) { + JSONObject json = null; + while(json == null && attempts++ < 3) { try { - doc = getResponse(url, true).parse(); + json = Http.url(url).getJSON(); } catch(SocketTimeoutException ex) { if(attempts >= 3) throw ex; logger.warn(String.format("[!] Connection timed out (attempt %d)", attempts)); } } - String jsonString = doc.body().html().replaceAll(""", "\""); - - Object jsonObj = new JSONTokener(jsonString).nextValue(); + Object jsonObj = new JSONTokener(json.toString()).nextValue(); JSONArray jsonArray = new JSONArray(); if (jsonObj instanceof JSONObject) { jsonArray.put( (JSONObject) jsonObj); } else if (jsonObj instanceof JSONArray){ jsonArray = (JSONArray) jsonObj; } else { - logger.warn("[!] Unable to parse child: " + jsonString); + logger.warn("[!] Unable to parse child: " + json.toString()); } return jsonArray; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java index 98340c6a..db73ad37 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SeeniveRipper.java @@ -7,7 +7,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.json.JSONObject; -import org.jsoup.Connection.Method; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -15,6 +14,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class SeeniveRipper extends AlbumRipper { @@ -42,7 +42,9 @@ public class SeeniveRipper extends AlbumRipper { public void rip() throws IOException { String baseURL = this.url.toExternalForm(); logger.info(" Retrieving " + baseURL); - Document doc = getDocument(baseURL, baseURL, null); + Document doc = Http.url(baseURL) + .referrer(baseURL) + .get(); while (true) { if (isStopped()) { break; @@ -70,8 +72,9 @@ public class SeeniveRipper extends AlbumRipper { } logger.info("[ ] Retrieving " + baseURL + "/next/" + lastID); - String jsonString = getResponse(baseURL + "/next/" + lastID, Method.GET, USER_AGENT, baseURL, null, true).body(); - JSONObject json = new JSONObject(jsonString); + JSONObject json = Http.url(baseURL + "/next/" + lastID) + .referrer(baseURL) + .getJSON(); String html = json.getString("Html"); if (html.equals("")) { break; @@ -111,7 +114,7 @@ public class SeeniveRipper extends AlbumRipper { @Override public void run() { try { - Document doc = getDocument(this.url); + Document doc = Http.url(this.url).get(); logger.info("[ ] Retreiving video page " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); for (Element element : doc.select("source")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java index a9673a78..920ce07b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SmuttyRipper.java @@ -11,6 +11,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class SmuttyRipper extends AlbumRipper { @@ -46,7 +47,9 @@ public class SmuttyRipper extends AlbumRipper { logger.info(" Retrieving " + url); Document doc; try { - doc = getResponse(url, true).parse(); + doc = Http.url(url) + .ignoreContentType() + .get(); } catch (IOException e) { if (e.toString().contains("Status=404")) { logger.info("No more pages to load"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java index a6475132..86e680aa 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SupertangasRipper.java @@ -13,6 +13,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class SupertangasRipper extends AlbumRipper { @@ -47,7 +48,7 @@ public class SupertangasRipper extends AlbumRipper { try { logger.info(" Retrieving " + theURL); sendUpdate(STATUS.LOADING_RESOURCE, theURL); - doc = getDocument(theURL); + doc = Http.url(theURL).get(); } catch (HttpStatusException e) { logger.debug("Hit end of pages at page " + page, e); break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java index 84959966..f3fa3826 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java @@ -12,6 +12,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class TeenplanetRipper extends AlbumRipper { @@ -38,7 +39,7 @@ public class TeenplanetRipper extends AlbumRipper { try { // Attempt to use album title as GID if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } Elements elems = albumDoc.select("div.header > h2"); return HOST + "_" + elems.get(0).text(); @@ -71,7 +72,7 @@ public class TeenplanetRipper extends AlbumRipper { logger.info("Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); if (albumDoc == null) { - albumDoc = getDocument(url); + albumDoc = Http.url(url).get(); } for (Element thumb : albumDoc.select("#galleryImages > a > img")) { if (!thumb.hasAttr("src")) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java index a379f367..e3654219 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java @@ -9,10 +9,10 @@ import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.json.JSONArray; import org.json.JSONObject; -import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class TumblrRipper extends AlbumRipper { @@ -66,9 +66,8 @@ public class TumblrRipper extends AlbumRipper { checkURL += url.getHost(); checkURL += "/info?api_key=" + API_KEY; try { - Document doc = getResponse(checkURL, true).parse(); - String jsonString = doc.body().html().replaceAll(""", "\""); - JSONObject json = new JSONObject(jsonString); + JSONObject json = Http.url(checkURL) + .getJSON(); int status = json.getJSONObject("meta").getInt("status"); return status == 200; } catch (IOException e) { @@ -98,15 +97,14 @@ public class TumblrRipper extends AlbumRipper { String apiURL = getTumblrApiURL(mediaType, offset); logger.info("Retrieving " + apiURL); sendUpdate(STATUS.LOADING_RESOURCE, apiURL); - Document doc = getResponse(apiURL, true).parse(); + JSONObject json = Http.url(apiURL).getJSON(); try { Thread.sleep(1000); } catch (InterruptedException e) { logger.error("[!] Interrupted while waiting to load next album:", e); break; } - String jsonString = doc.body().html().replaceAll(""", "\""); - if (!handleJSON(jsonString)) { + if (!handleJSON(json)) { // Returns false if an error occurs and we should stop. break; } @@ -119,12 +117,7 @@ public class TumblrRipper extends AlbumRipper { waitForThreads(); } - private boolean handleJSON(String jsonString) { - JSONObject json = new JSONObject(jsonString); - if (json == null || !json.has("response")) { - logger.error("[!] JSON response from tumblr was invalid: " + jsonString); - return false; - } + private boolean handleJSON(JSONObject json) { JSONArray posts, photos; JSONObject post, photo; URL fileURL; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java index e18f7fc3..7b12409e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java @@ -12,10 +12,10 @@ import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.json.JSONTokener; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class TwitterRipper extends AlbumRipper { @@ -71,13 +71,13 @@ public class TwitterRipper extends AlbumRipper { } private void getAccessToken() throws IOException { - Document doc = Jsoup.connect("https://api.twitter.com/oauth2/token") - .ignoreContentType(true) - .header("Authorization", "Basic " + authKey) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .data("grant_type", "client_credentials") - .post(); + Document doc = Http.url("https://api.twitter.com/oauth2/token") + .ignoreContentType() + .header("Authorization", "Basic " + authKey) + .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") + .header("User-agent", "ripe and zipe") + .data("grant_type", "client_credentials") + .post(); String body = doc.body().html().replaceAll(""", "\""); try { JSONObject json = new JSONObject(body); @@ -90,8 +90,8 @@ public class TwitterRipper extends AlbumRipper { } private void checkRateLimits(String resource, String api) throws IOException { - Document doc = Jsoup.connect("https://api.twitter.com/1.1/application/rate_limit_status.json?resources=" + resource) - .ignoreContentType(true) + Document doc = Http.url("https://api.twitter.com/1.1/application/rate_limit_status.json?resources=" + resource) + .ignoreContentType() .header("Authorization", "Bearer " + accessToken) .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") .header("User-agent", "ripe and zipe") @@ -143,12 +143,12 @@ public class TwitterRipper extends AlbumRipper { private List getTweets(String url) throws IOException { List tweets = new ArrayList(); logger.info(" Retrieving " + url); - Document doc = Jsoup.connect(url) - .ignoreContentType(true) - .header("Authorization", "Bearer " + accessToken) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .get(); + Document doc = Http.url(url) + .ignoreContentType() + .header("Authorization", "Bearer " + accessToken) + .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") + .header("User-agent", "ripe and zipe") + .get(); String body = doc.body().html().replaceAll(""", "\""); Object jsonObj = new JSONTokener(body).nextValue(); JSONArray statuses; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java index 6479b31c..b6402d7b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VidbleRipper.java @@ -11,6 +11,7 @@ import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class VidbleRipper extends AlbumRipper { @@ -49,7 +50,7 @@ public class VidbleRipper extends AlbumRipper { logger.info("Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); if (albumDoc == null) { - albumDoc = getDocument(this.url); + albumDoc = Http.url(this.url).get(); } Elements els = albumDoc.select("#ContentPlaceHolder1_thumbs"); if (els.size() == 0) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java index 0673c86f..aa453e1a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VineRipper.java @@ -9,10 +9,10 @@ import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.HttpStatusException; -import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class VineRipper extends AlbumRipper { @@ -37,7 +37,7 @@ public class VineRipper extends AlbumRipper { public void rip() throws IOException { int page = 0; String baseURL = "https://vine.co/api/timelines/users/" + getGID(this.url); - Document doc; + JSONObject json = null; while (true) { page++; String theURL = baseURL; @@ -47,14 +47,11 @@ public class VineRipper extends AlbumRipper { try { logger.info(" Retrieving " + theURL); sendUpdate(STATUS.LOADING_RESOURCE, theURL); - doc = getResponse(theURL, true).parse(); + json = Http.url(theURL).getJSON(); } catch (HttpStatusException e) { logger.debug("Hit end of pages at page " + page, e); break; } - String jsonString = doc.body().html(); - jsonString = jsonString.replace(""", "\""); - JSONObject json = new JSONObject(jsonString); JSONArray records = json.getJSONObject("data").getJSONArray("records"); for (int i = 0; i < records.length(); i++) { String videoURL = records.getJSONObject(i).getString("videoUrl"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java index 2d5776b5..e04cac14 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VineboxRipper.java @@ -12,6 +12,7 @@ import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Http; public class VineboxRipper extends AlbumRipper { @@ -42,7 +43,7 @@ public class VineboxRipper extends AlbumRipper { logger.info("Retrieving " + urlPaged); sendUpdate(STATUS.LOADING_RESOURCE, urlPaged); try { - doc = getDocument(this.url);; + doc = Http.url(this.url).get(); } catch (HttpStatusException e) { logger.debug("Hit end of pages at page " + page, e); break; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java index e3777e6f..724acef8 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java @@ -18,6 +18,7 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; public class VkRipper extends AlbumRipper { @@ -66,13 +67,11 @@ public class VkRipper extends AlbumRipper { postData.put("act", "load_videos_silent"); postData.put("offset", "0"); postData.put("oid", oid); - Document doc = Jsoup.connect(u) - .header("Referer", this.url.toExternalForm()) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .timeout(5000) - .data(postData) - .post(); + Document doc = Http.url(u) + .referrer(this.url) + .ignoreContentType() + .data(postData) + .post(); String[] jsonStrings = doc.toString().split(""); JSONObject json = new JSONObject(jsonStrings[jsonStrings.length - 1]); JSONArray videos = json.getJSONArray("all"); @@ -108,13 +107,11 @@ public class VkRipper extends AlbumRipper { postData.put("al", "1"); postData.put("offset", Integer.toString(offset)); postData.put("part", "1"); - Document doc = Jsoup.connect(this.url.toExternalForm()) - .header("Referer", this.url.toExternalForm()) - .ignoreContentType(true) - .userAgent(USER_AGENT) - .timeout(5000) - .data(postData) - .post(); + Document doc = Http.url(this.url) + .referrer(this.url) + .ignoreContentType() + .data(postData) + .post(); String body = doc.toString(); if (!body.contains(" cookies) { + connection.cookies(cookies); + return this; + } + public Http data(Map data) { + connection.data(data); + return this; + } + public Http data(String name, String value) { + Map data = new HashMap(); + data.put(name, value); + return data(data); + } + public Http method(Method method) { + connection.method(method); + return this; + } + + // Getters + public Document get() throws IOException { + connection.method(Method.GET); + return response().parse(); + } + + public Document post() throws IOException { + connection.method(Method.POST); + return response().parse(); + } + + public JSONObject getJSON() throws IOException { + ignoreContentType(); + String jsonString = response().body().replace(""", "\""); + return new JSONObject(jsonString); + } + + public Response response() throws IOException { + Response response = null; + int retries = this.retries; + while (--retries >= 0) { + try { + response = connection.execute(); + return response; + } catch (IOException e) { + logger.warn("Error while loading " + url, e); + continue; + } + } + throw new IOException("Failed to load " + url + " after " + this.retries + " attempts"); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java index cea5d5bf..f6c34556 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java @@ -8,13 +8,6 @@ import java.util.List; import com.rarchives.ripme.ripper.rippers.DeviantartRipper; public class DeviantartRipperTest extends RippersTest { - - public void testAlphaSorting() { - String[] strings = new String[]{"a", "aa", "aaa", "d6hg2dz", "d6fspba", "d6fcvvr"}; - for (String string : strings) { - System.err.println(string + ": " + DeviantartRipper.alphaToLong(string)); - } - } public void testDeviantartAlbums() throws IOException { if (!DOWNLOAD_CONTENT) {