From e904a7ee9798cd77515d9edf1768f05817a8ad13 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sun, 6 Apr 2014 12:07:53 -0700 Subject: [PATCH] Added support for imgur.com/r/subreddit albums --- pom.xml | 2 +- .../ripme/ripper/rippers/ImgurRipper.java | 61 ++++++++++++++++++- .../tst/ripper/rippers/ImgurRipperTest.java | 6 ++ 3 files changed, 65 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 0e0229f6..cb8ca1f0 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.4 + 1.0.6 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index fe2fcce1..f90b7de9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -15,6 +15,7 @@ import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; @@ -31,7 +32,8 @@ public class ImgurRipper extends AbstractRipper { ALBUM, USER, USER_ALBUM, - SERIES_OF_IMAGES + SERIES_OF_IMAGES, + SUBREDDIT }; private ALBUM_TYPE albumType; @@ -85,6 +87,9 @@ public class ImgurRipper extends AbstractRipper { // TODO Get all albums by user ripUserAccount(url); break; + case SUBREDDIT: + ripSubreddit(url); + break; } waitForThreads(); } @@ -211,6 +216,43 @@ public class ImgurRipper extends AbstractRipper { } } } + + private void ripSubreddit(URL url) throws IOException { + int page = 0; + while (true) { + String pageURL = url.toExternalForm(); + if (!pageURL.endsWith("/")) { + pageURL += "/"; + } + pageURL += "page/" + page + "/miss?scrolled"; + logger.info(" Retrieving " + pageURL); + Document doc = Jsoup.connect(pageURL) + .userAgent(USER_AGENT) + .get(); + Elements imgs = doc.select(".post img"); + for (Element img : imgs) { + String image = img.attr("src"); + if (image.startsWith("//")) { + image = "http:" + image; + } + if (image.contains("b.")) { + image = image.replace("b.", "."); + } + URL imageURL = new URL(image); + addURLToDownload(imageURL); + } + if (imgs.size() == 0) { + break; + } + page++; + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + logger.error("Interrupted while waiting to load next album: ", e); + break; + } + } + } @Override public String getHost() { @@ -233,8 +275,8 @@ public class ImgurRipper extends AbstractRipper { if (m.matches()) { // Root imgur account String gid = m.group(1); - if (gid.equals("i")) { - throw new MalformedURLException("Ripping i.imgur.com links not supported"); + if (gid.equals("www")) { + throw new MalformedURLException("Cannot rip the www.imgur.com homepage"); } albumType = ALBUM_TYPE.USER; return gid; @@ -246,6 +288,19 @@ public class ImgurRipper extends AbstractRipper { albumType = ALBUM_TYPE.USER_ALBUM; return m.group(); } + p = Pattern.compile("^https?://(www\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Imgur subreddit aggregator + albumType = ALBUM_TYPE.SUBREDDIT; + String album = m.group(2); + for (int i = 3; i <= m.groupCount(); i++) { + if (m.group(i) != null) { + album += "_" + m.group(i).replace("/", ""); + } + } + return album; + } p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java index 6f2cb276..f64028c1 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java @@ -42,11 +42,16 @@ public class ImgurRipperTest extends RippersTest { passURLs.add(new URL("http://imgur.com/YOdjht3,x5VxH9G,5juXjJ2")); passURLs.add(new URL("http://markedone911.imgur.com")); passURLs.add(new URL("http://markedone911.imgur.com/")); + passURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all")); + passURLs.add(new URL("http://imgur.com/r/nsfw_oc/top")); + passURLs.add(new URL("http://imgur.com/r/nsfw_oc/new")); + passURLs.add(new URL("http://imgur.com/r/nsfw_oc")); for (URL url : passURLs) { try { ImgurRipper ripper = new ImgurRipper(url); assert(ripper.canRip(url)); + System.err.println(ripper.getWorkingDir()); deleteDir(ripper.getWorkingDir()); } catch (Exception e) { fail("Failed to instantiate ripper for " + url); @@ -66,6 +71,7 @@ public class ImgurRipperTest extends RippersTest { contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/vertical#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0")); + contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all")); for (URL url : contentURLs) { try { ImgurRipper ripper = new ImgurRipper(url);