Added support for imgur.com/r/subreddit albums

This commit is contained in:
4pr0n 2014-04-06 12:07:53 -07:00
parent 0863a9328c
commit e904a7ee97
3 changed files with 65 additions and 4 deletions

View File

@ -4,7 +4,7 @@
<groupId>com.rarchives.ripme</groupId>
<artifactId>ripme</artifactId>
<packaging>jar</packaging>
<version>1.0.4</version>
<version>1.0.6</version>
<name>ripme</name>
<url>http://rip.rarchives.com</url>
<properties>

View File

@ -15,6 +15,7 @@ import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
@ -31,7 +32,8 @@ public class ImgurRipper extends AbstractRipper {
ALBUM,
USER,
USER_ALBUM,
SERIES_OF_IMAGES
SERIES_OF_IMAGES,
SUBREDDIT
};
private ALBUM_TYPE albumType;
@ -85,6 +87,9 @@ public class ImgurRipper extends AbstractRipper {
// TODO Get all albums by user
ripUserAccount(url);
break;
case SUBREDDIT:
ripSubreddit(url);
break;
}
waitForThreads();
}
@ -212,6 +217,43 @@ public class ImgurRipper extends AbstractRipper {
}
}
private void ripSubreddit(URL url) throws IOException {
int page = 0;
while (true) {
String pageURL = url.toExternalForm();
if (!pageURL.endsWith("/")) {
pageURL += "/";
}
pageURL += "page/" + page + "/miss?scrolled";
logger.info(" Retrieving " + pageURL);
Document doc = Jsoup.connect(pageURL)
.userAgent(USER_AGENT)
.get();
Elements imgs = doc.select(".post img");
for (Element img : imgs) {
String image = img.attr("src");
if (image.startsWith("//")) {
image = "http:" + image;
}
if (image.contains("b.")) {
image = image.replace("b.", ".");
}
URL imageURL = new URL(image);
addURLToDownload(imageURL);
}
if (imgs.size() == 0) {
break;
}
page++;
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting to load next album: ", e);
break;
}
}
}
@Override
public String getHost() {
return HOST;
@ -233,8 +275,8 @@ public class ImgurRipper extends AbstractRipper {
if (m.matches()) {
// Root imgur account
String gid = m.group(1);
if (gid.equals("i")) {
throw new MalformedURLException("Ripping i.imgur.com links not supported");
if (gid.equals("www")) {
throw new MalformedURLException("Cannot rip the www.imgur.com homepage");
}
albumType = ALBUM_TYPE.USER;
return gid;
@ -246,6 +288,19 @@ public class ImgurRipper extends AbstractRipper {
albumType = ALBUM_TYPE.USER_ALBUM;
return m.group();
}
p = Pattern.compile("^https?://(www\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week)?/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur subreddit aggregator
albumType = ALBUM_TYPE.SUBREDDIT;
String album = m.group(2);
for (int i = 3; i <= m.groupCount(); i++) {
if (m.group(i) != null) {
album += "_" + m.group(i).replace("/", "");
}
}
return album;
}
p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {

View File

@ -42,11 +42,16 @@ public class ImgurRipperTest extends RippersTest {
passURLs.add(new URL("http://imgur.com/YOdjht3,x5VxH9G,5juXjJ2"));
passURLs.add(new URL("http://markedone911.imgur.com"));
passURLs.add(new URL("http://markedone911.imgur.com/"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc/top"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc/new"));
passURLs.add(new URL("http://imgur.com/r/nsfw_oc"));
for (URL url : passURLs) {
try {
ImgurRipper ripper = new ImgurRipper(url);
assert(ripper.canRip(url));
System.err.println(ripper.getWorkingDir());
deleteDir(ripper.getWorkingDir());
} catch (Exception e) {
fail("Failed to instantiate ripper for " + url);
@ -66,6 +71,7 @@ public class ImgurRipperTest extends RippersTest {
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/vertical#0"));
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/horizontal#0"));
contentURLs.add(new URL("http://imgur.com/a/WxG6f/layout/grid#0"));
contentURLs.add(new URL("http://imgur.com/r/nsfw_oc/top/all"));
for (URL url : contentURLs) {
try {
ImgurRipper ripper = new ImgurRipper(url);