ImgScroll/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java

143 lines
4.3 KiB
Java
Raw Normal View History

package com.rarchives.ripme.ripper.rippers;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractRipper;
public class ImgurRipper extends AbstractRipper {
private static final String DOMAIN = "imgur.com",
HOST = "imgur";
private static final Logger logger = Logger.getLogger(ImgurRipper.class);
static enum ALBUM_TYPE {
ALBUM,
USER,
USER_ALBUM,
SERIES_OF_IMAGES
};
private ALBUM_TYPE albumType;
public ImgurRipper(URL url) throws IOException {
super(url);
}
public void processURL(URL url, String prefix) {
logger.debug("Found URL: " + url);
addURLToDownload(url, prefix);
}
public boolean canRip(URL url) {
if (!url.getHost().endsWith(DOMAIN)) {
return false;
}
try {
getGID(url);
} catch (Exception e) {
// Can't get GID, can't rip it.
return false;
}
return true;
}
public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm();
if (u.indexOf('#') >= 0) {
u = u.substring(0, u.indexOf('#'));
}
return new URL(u);
}
@Override
public void rip() throws IOException {
switch (albumType) {
case ALBUM:
this.url = new URL(this.url.toExternalForm() + "/noscript");
// Fall-through
case USER_ALBUM:
ripAlbum(this.url);
break;
case SERIES_OF_IMAGES:
// TODO Get all images
break;
case USER:
// TODO Get all albums by user
break;
}
threadPool.waitForThreads();
}
private void ripAlbum(URL url) throws IOException {
int index = 0;
2014-02-28 04:49:28 +01:00
logger.info("[ ] Retrieving " + url.toExternalForm());
Document doc = Jsoup.connect(url.toExternalForm()).get();
for (Element thumb : doc.select("div.image")) {
String image;
if (thumb.select("a.zoom").size() > 0) {
// Clickably full-size
image = "http:" + thumb.select("a").attr("href");
} else if (thumb.select("img").size() > 0) {
image = "http:" + thumb.select("img").attr("src");
} else {
// Unable to find image in this div
2014-02-28 04:49:28 +01:00
logger.error("[!] Unable to find image in div: " + thumb.toString());
continue;
}
index += 1;
processURL(new URL(image), String.format("%03d_", index));
}
}
@Override
public String getHost() {
return HOST;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://(m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur album
albumType = ALBUM_TYPE.ALBUM;
String gid = m.group(m.groupCount());
this.url = new URL("http://imgur.com/a/" + gid);
return gid;
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-])\\.imgur\\.com/?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Root imgur account
albumType = ALBUM_TYPE.USER;
return m.group(m.groupCount());
}
p = Pattern.compile("^https?://([a-zA-Z0-9\\-])\\.imgur\\.com/([a-zA-Z0-9])?$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Imgur account album
albumType = ALBUM_TYPE.USER_ALBUM;
return m.group();
}
p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Series of imgur images
albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
return m.group();
}
throw new MalformedURLException("Unexpected URL format: " + url.toExternalForm());
}
}