From b9e3d774490ccc92627d478bac789f311d551c8f Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sun, 22 Jun 2014 19:17:40 -0700 Subject: [PATCH] Removed SinglePage ripper --- .../ripme/ripper/AbstractHTMLRipper.java | 7 +- .../ripme/ripper/AbstractJSONRipper.java | 3 + .../ripper/AbstractSinglePageRipper.java | 70 ------------------- .../ripper/rippers/ButttoucherRipper.java | 4 +- .../ripme/ripper/rippers/ChanRipper.java | 4 +- .../ripper/rippers/EightmusesRipper.java | 4 +- .../ripper/rippers/GirlsOfDesireRipper.java | 51 ++------------ 7 files changed, 20 insertions(+), 123 deletions(-) delete mode 100644 src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index ddd00cc2..7dbba6d6 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -10,6 +10,9 @@ import org.jsoup.nodes.Document; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; +/** + * Simplified ripper, designed for ripping from sites by parsing HTML. + */ public abstract class AbstractHTMLRipper extends AlbumRipper { public AbstractHTMLRipper(URL url) throws IOException { @@ -20,7 +23,9 @@ public abstract class AbstractHTMLRipper extends AlbumRipper { public abstract String getHost(); public abstract Document getFirstPage() throws IOException; - public abstract Document getNextPage(Document doc) throws IOException; + public Document getNextPage(Document doc) throws IOException { + throw new IOException("getNextPage not implemented"); + } public abstract List getURLsFromPage(Document page); public abstract void downloadURL(URL url, int index); public DownloadThreadPool getThreadPool() { diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java index 0641e61a..fea5c49e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java @@ -10,6 +10,9 @@ import org.json.JSONObject; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; +/** + * Simplified ripper, designed for ripping from sites by parsing JSON. + */ public abstract class AbstractJSONRipper extends AlbumRipper { public AbstractJSONRipper(URL url) throws IOException { diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java deleted file mode 100644 index 37319661..00000000 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java +++ /dev/null @@ -1,70 +0,0 @@ -package com.rarchives.ripme.ripper; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.List; - -import org.jsoup.nodes.Document; - -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; -import com.rarchives.ripme.utils.Utils; - -public abstract class AbstractSinglePageRipper extends AlbumRipper { - - public AbstractSinglePageRipper(URL url) throws IOException { - super(url); - } - - public abstract String getDomain(); - public abstract String getHost(); - - public abstract Document getFirstPage() throws IOException; - public abstract List getURLsFromPage(Document page); - public abstract void downloadURL(URL url, int index); - - public boolean keepSortOrder() { - return true; - } - - @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(getDomain()); - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - int index = 0; - logger.info("Retrieving " + this.url); - sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); - Document doc = getFirstPage(); - List imageURLs = getURLsFromPage(doc); - - if (imageURLs.size() == 0) { - throw new IOException("No images found at " + this.url); - } - - for (String imageURL : imageURLs) { - if (isStopped()) { - logger.info("Interrupted"); - break; - } - index += 1; - downloadURL(new URL(imageURL), index); - } - waitForThreads(); - } - - public String getPrefix(int index) { - String prefix = ""; - if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - return prefix; - } -} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java index ee2f699a..ef78ca17 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ButttoucherRipper.java @@ -11,10 +11,10 @@ import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AbstractSinglePageRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -public class ButttoucherRipper extends AbstractSinglePageRipper { +public class ButttoucherRipper extends AbstractHTMLRipper { public ButttoucherRipper(URL url) throws IOException { super(url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index 03c32d1b..142e9973 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -11,10 +11,10 @@ import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AbstractSinglePageRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -public class ChanRipper extends AbstractSinglePageRipper { +public class ChanRipper extends AbstractHTMLRipper { public ChanRipper(URL url) throws IOException { super(url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index b766442e..6b64b03e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -16,11 +16,11 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AbstractSinglePageRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -public class EightmusesRipper extends AbstractSinglePageRipper { +public class EightmusesRipper extends AbstractHTMLRipper { private Document albumDoc = null; private Map cookies = new HashMap(); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java index f5ffd6c2..a7130ed3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java @@ -12,14 +12,10 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AbstractSinglePageRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -public class GirlsOfDesireRipper extends AbstractSinglePageRipper { - // All sleep times are in milliseconds - private static final int IMAGE_SLEEP_TIME = 100; - +public class GirlsOfDesireRipper extends AbstractHTMLRipper { // Current HTML document private Document albumDoc = null; @@ -65,7 +61,7 @@ public class GirlsOfDesireRipper extends AbstractSinglePageRipper { + "http://www.girlsofdesire.org/galleries//" + " Got: " + url); } - + @Override public Document getFirstPage() throws IOException { if (albumDoc == null) { @@ -90,44 +86,7 @@ public class GirlsOfDesireRipper extends AbstractSinglePageRipper { @Override public void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index)); - } - - @Override - public void rip() throws IOException { - String nextUrl = this.url.toExternalForm(); - - if (albumDoc == null) { - logger.info(" Retrieving album page " + nextUrl); - sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Http.url(nextUrl).get(); - } - - // Find thumbnails - Elements thumbs = albumDoc.select("td.vtop > a > img"); - if (thumbs.size() == 0) { - logger.info("No images found at " + nextUrl); - } - - // Iterate over images on page - for (Element thumb : thumbs) { - if (isStopped()) { - break; - } - // Convert thumbnail to full-size image - String imgSrc = thumb.attr("src"); - imgSrc = imgSrc.replaceAll("_thumb\\.", "."); - URL imgUrl = new URL(url, imgSrc); - - addURLToDownload(imgUrl, "", "", this.url.toExternalForm(), null); - - try { - Thread.sleep(IMAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.warn("Interrupted while waiting to load next image", e); - } - } - - waitForThreads(); + // Send referrer when downloading images + addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), null); } } \ No newline at end of file