Added support for imgur.com/r/subreddit albums

This commit is contained in:
4pr0n 2014-04-06 12:07:53 -07:00
parent 0863a9328c
commit e904a7ee97
3 changed files with 65 additions and 4 deletions

View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId> <groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId> <artifactId>ripme</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<version>1.0.4</version> <version>1.0.6</version>
<name>ripme</name> <name>ripme</name>
<url>http://rip.rarchives.com</url> <url>http://rip.rarchives.com</url>
<properties> <properties>

View File

@ -15,6 +15,7 @@ import org.json.JSONObject;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractRipper; import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
@ -31,7 +32,8 @@ public class ImgurRipper extends AbstractRipper {
ALBUM, ALBUM,
USER, USER,
USER_ALBUM, USER_ALBUM,
SERIES_OF_IMAGES SERIES_OF_IMAGES,
SUBREDDIT
}; };
private ALBUM_TYPE albumType; private ALBUM_TYPE albumType;
@ -85,6 +87,9 @@ public class ImgurRipper extends AbstractRipper {
// TODO Get all albums by user // TODO Get all albums by user
ripUserAccount(url); ripUserAccount(url);
break; break;
case SUBREDDIT:
ripSubreddit(url);
break;
} }
waitForThreads(); waitForThreads();
} }
@ -211,6 +216,43 @@ public class ImgurRipper extends AbstractRipper {
} }
} }
} }
private void ripSubreddit(URL url) throws IOException {
int page = 0;
while (true) {
String pageURL = url.toExternalForm();
if (!pageURL.endsWith("/")) {
pageURL += "/";
}
pageURL += "page/" + page + "/miss?scrolled";
logger.info(" Retrieving " + pageURL);
Document doc = Jsoup.connect(pageURL)
.userAgent(USER_AGENT)
.get();
Elements imgs = doc.select(".post img");
for (Element img : imgs) {
String image = img.attr("src");
if (image.startsWith("//")) {
image = "http:" + image;
}
if (image.contains("b.")) {
image = image.replace("b.", ".");
}
URL imageURL = new URL(image);
addURLToDownload(imageURL);
}
if (imgs.size() == 0) {
break;
}
page++;
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting to load next album: ", e);
break;
}
}
}
@Override @Override
public String getHost() { public String getHost() {
@ -233,8 +275,8 @@ public class ImgurRipper extends AbstractRipper {
if (m.matches()) { if (m.matches()) {
// Root imgur account // Root imgur account
String gid = m.group(1); String gid = m.group(1);
if (gid.equals("i")) { if (gid.equals("www")) {
throw new MalformedURLException("Ripping i.imgur.com links not supported"); throw new MalformedURLException("Cannot rip the www.imgur.com homepage");
} }
albumType = ALBUM_TYPE.USER; albumType = ALBUM_TYPE.USER;
return gid; return gid;
@ -246,6 +288,19 @@ public class ImgurRipper extends AbstractRipper {
albumType = ALBUM_TYPE.USER_ALBUM; albumType = ALBUM_TYPE.USER_ALBUM;
return m.group(); return m.group();
} }
p = Pattern.compile("^https?://(www\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur subreddit aggregator
albumType = ALBUM_TYPE.SUBREDDIT;
String album = m.group(2);
for (int i = 3; i <= m.groupCount(); i++) {
if (m.group(i) != null) {
album += "_" + m.group(i).replace("/", "");
}
}
return album;
}
p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {

View File

@ -42,11 +42,16 @@ public class ImgurRipperTest extends RippersTest {
passURLs.add(new URL("http://imgur.com/YOdjht3,x5VxH9G,5juXjJ2")); passURLs.add(new URL("http://imgur.com/YOdjht3,x5VxH9G,5juXjJ2"));
passURLs.add(new URL("http://markedone911.imgur.com")); passURLs.add(new URL("http://markedone911.imgur.com"));
passURLs.add(new URL("http://markedone911.imgur.com/")); passURLs.add(new URL("http://markedone911.imgur.com/"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc/top"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc/new"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc"));
for (URL url : passURLs) { for (URL url : passURLs) {
try { try {
ImgurRipper ripper = new ImgurRipper(url); ImgurRipper ripper = new ImgurRipper(url);
assert(ripper.canRip(url)); assert(ripper.canRip(url));
System.err.println(ripper.getWorkingDir());
deleteDir(ripper.getWorkingDir()); deleteDir(ripper.getWorkingDir());
} catch (Exception e) { } catch (Exception e) {
fail("Failed to instantiate ripper for " + url); fail("Failed to instantiate ripper for " + url);
@ -66,6 +71,7 @@ public class ImgurRipperTest extends RippersTest {
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/vertical#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/vertical#0"));
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0")); contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
for (URL url : contentURLs) { for (URL url : contentURLs) {
try { try {
ImgurRipper ripper = new ImgurRipper(url); ImgurRipper ripper = new ImgurRipper(url);