2014-02-27 04:54:44 +01:00
|
|
|
package com.rarchives.ripme.ripper.rippers;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
|
|
|
import org.apache.log4j.Logger;
|
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
|
|
|
|
|
|
|
import com.rarchives.ripme.ripper.AbstractRipper;
|
|
|
|
|
|
|
|
public class ImgurRipper extends AbstractRipper {
|
|
|
|
|
|
|
|
private static final String DOMAIN = "imgur.com",
|
|
|
|
HOST = "imgur";
|
|
|
|
private static final Logger logger = Logger.getLogger(ImgurRipper.class);
|
|
|
|
|
|
|
|
static enum ALBUM_TYPE {
|
|
|
|
ALBUM,
|
|
|
|
USER,
|
|
|
|
USER_ALBUM,
|
|
|
|
SERIES_OF_IMAGES
|
|
|
|
};
|
|
|
|
private ALBUM_TYPE albumType;
|
|
|
|
|
|
|
|
public ImgurRipper(URL url) throws IOException {
|
|
|
|
super(url);
|
|
|
|
}
|
|
|
|
|
|
|
|
public void processURL(URL url, String prefix) {
|
2014-02-27 10:28:23 +01:00
|
|
|
logger.debug("Found URL: " + url);
|
2014-02-27 04:54:44 +01:00
|
|
|
addURLToDownload(url, prefix);
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean canRip(URL url) {
|
|
|
|
if (!url.getHost().endsWith(DOMAIN)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
getGID(url);
|
|
|
|
} catch (Exception e) {
|
|
|
|
// Can't get GID, can't rip it.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
|
|
String u = url.toExternalForm();
|
|
|
|
if (u.indexOf('#') >= 0) {
|
|
|
|
u = u.substring(0, u.indexOf('#'));
|
|
|
|
}
|
|
|
|
return new URL(u);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void rip() throws IOException {
|
|
|
|
switch (albumType) {
|
|
|
|
case ALBUM:
|
|
|
|
this.url = new URL(this.url.toExternalForm() + "/noscript");
|
|
|
|
// Fall-through
|
|
|
|
case USER_ALBUM:
|
|
|
|
ripAlbum(this.url);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SERIES_OF_IMAGES:
|
|
|
|
// TODO Get all images
|
|
|
|
break;
|
|
|
|
|
|
|
|
case USER:
|
|
|
|
// TODO Get all albums by user
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
threadPool.waitForThreads();
|
|
|
|
}
|
|
|
|
|
|
|
|
private void ripAlbum(URL url) throws IOException {
|
|
|
|
int index = 0;
|
2014-02-28 04:49:28 +01:00
|
|
|
logger.info("[ ] Retrieving " + url.toExternalForm());
|
2014-02-27 04:54:44 +01:00
|
|
|
Document doc = Jsoup.connect(url.toExternalForm()).get();
|
|
|
|
for (Element thumb : doc.select("div.image")) {
|
|
|
|
String image;
|
|
|
|
if (thumb.select("a.zoom").size() > 0) {
|
|
|
|
// Clickably full-size
|
|
|
|
image = "http:" + thumb.select("a").attr("href");
|
|
|
|
} else if (thumb.select("img").size() > 0) {
|
|
|
|
image = "http:" + thumb.select("img").attr("src");
|
|
|
|
} else {
|
|
|
|
// Unable to find image in this div
|
2014-02-28 04:49:28 +01:00
|
|
|
logger.error("[!] Unable to find image in div: " + thumb.toString());
|
2014-02-27 04:54:44 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
index += 1;
|
|
|
|
processURL(new URL(image), String.format("%03d_", index));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getHost() {
|
|
|
|
return HOST;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getGID(URL url) throws MalformedURLException {
|
|
|
|
Pattern p = Pattern.compile("^https?://(m\\.)?imgur\\.com/a/([a-zA-Z0-9]{5,8}).*$");
|
|
|
|
Matcher m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur album
|
|
|
|
albumType = ALBUM_TYPE.ALBUM;
|
|
|
|
String gid = m.group(m.groupCount());
|
|
|
|
this.url = new URL("http://imgur.com/a/" + gid);
|
|
|
|
return gid;
|
|
|
|
}
|
|
|
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-])\\.imgur\\.com/?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Root imgur account
|
|
|
|
albumType = ALBUM_TYPE.USER;
|
|
|
|
return m.group(m.groupCount());
|
|
|
|
}
|
|
|
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-])\\.imgur\\.com/([a-zA-Z0-9])?$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur account album
|
|
|
|
albumType = ALBUM_TYPE.USER_ALBUM;
|
|
|
|
return m.group();
|
|
|
|
}
|
|
|
|
p = Pattern.compile("^https?://(i\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Series of imgur images
|
|
|
|
albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
|
|
|
|
return m.group();
|
|
|
|
}
|
|
|
|
throw new MalformedURLException("Unexpected URL format: " + url.toExternalForm());
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|