From 0f61dd00f0f88d12da327d946c8d675e3a65dfbf Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 21 Mar 2018 23:59:19 -0400 Subject: [PATCH 1/2] Reddit ripper now gets erome links --- .../ripme/ripper/rippers/RedditRipper.java | 6 +++ .../com/rarchives/ripme/utils/RipUtils.java | 40 +++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java index 52e9a6d2..e8798476 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/RedditRipper.java @@ -4,10 +4,13 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; import org.json.JSONArray; import org.json.JSONObject; import org.json.JSONTokener; @@ -17,6 +20,9 @@ import com.rarchives.ripme.ui.UpdateUtils; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; public class RedditRipper extends AlbumRipper { diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index b7b8c239..01d20e7c 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -9,19 +9,18 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.ripper.rippers.EroShareRipper; +import com.rarchives.ripme.ripper.rippers.EromeRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.VidbleRipper; +import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; import org.apache.commons.lang.math.NumberUtils; import org.apache.log4j.Logger; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AbstractRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurImage; -import com.rarchives.ripme.ripper.rippers.VidbleRipper; -import com.rarchives.ripme.ripper.rippers.video.GfycatRipper; -import com.rarchives.ripme.ripper.rippers.EroShareRipper; public class RipUtils { private static final Logger logger = Logger.getLogger(RipUtils.class); @@ -35,8 +34,8 @@ public class RipUtils { && url.toExternalForm().contains("imgur.com/a/")) { try { logger.debug("Fetching imgur album at " + url); - ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); - for (ImgurImage imgurImage : imgurAlbum.images) { + ImgurRipper.ImgurAlbum imgurAlbum = ImgurRipper.getImgurAlbum(url); + for (ImgurRipper.ImgurImage imgurImage : imgurAlbum.images) { logger.debug("Got imgur image: " + imgurImage.url); result.add(imgurImage.url); } @@ -49,8 +48,8 @@ public class RipUtils { // Imgur image series. try { logger.debug("Fetching imgur series at " + url); - ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); - for (ImgurImage imgurImage : imgurAlbum.images) { + ImgurRipper.ImgurAlbum imgurAlbum = ImgurRipper.getImgurSeries(url); + for (ImgurRipper.ImgurImage imgurImage : imgurAlbum.images) { logger.debug("Got imgur image: " + imgurImage.url); result.add(imgurImage.url); } @@ -91,6 +90,21 @@ public class RipUtils { return result; } + else if (url.toExternalForm().contains("erome.com")) { + try { + logger.info("Getting eroshare album " + url); + EromeRipper r = new EromeRipper(url); + Document tempDoc = r.getFirstPage(); + for (String u : r.getURLsFromPage(tempDoc)) { + result.add(new URL(u)); + } + } catch (IOException e) { + // Do nothing + logger.warn("Exception while retrieving eroshare page:", e); + } + return result; + } + Pattern p = Pattern.compile("https?://i.reddituploads.com/([a-zA-Z0-9]+)\\?.*"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { @@ -122,8 +136,8 @@ public class RipUtils { try { // Fetch the page Document doc = Jsoup.connect(url.toExternalForm()) - .userAgent(AbstractRipper.USER_AGENT) - .get(); + .userAgent(AbstractRipper.USER_AGENT) + .get(); for (Element el : doc.select("meta")) { if (el.attr("name").equals("twitter:image:src")) { result.add(new URL(el.attr("content"))); From 06e566ac63ff1205ed84379f3aa1628c9ceaa7da Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 22 Mar 2018 00:01:51 -0400 Subject: [PATCH 2/2] Revert "Hentaifoundry Ripper no longer errors out when there is no next page" This reverts commit acb6356b946b433db563c2fda9ee6808a6b64f6f. --- .../rarchives/ripme/ripper/rippers/HentaifoundryRipper.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java index c0031548..561c4249 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HentaifoundryRipper.java @@ -68,7 +68,8 @@ public class HentaifoundryRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { - if (doc.select("li.next > a").size() == 0) { + if (doc.select("li.next.hidden").size() != 0) { + // Last page throw new IOException("No more pages"); } Elements els = doc.select("li.next > a");