2014-02-27 04:54:44 +01:00
|
|
|
package com.rarchives.ripme.ripper.rippers;
|
|
|
|
|
2014-04-13 01:53:49 +02:00
|
|
|
import java.io.File;
|
2014-02-27 04:54:44 +01:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.net.MalformedURLException;
|
|
|
|
import java.net.URL;
|
2014-03-11 09:29:46 +01:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
2014-02-27 04:54:44 +01:00
|
|
|
import java.util.regex.Matcher;
|
|
|
|
import java.util.regex.Pattern;
|
|
|
|
|
2014-02-28 12:04:03 +01:00
|
|
|
import org.json.JSONArray;
|
|
|
|
import org.json.JSONException;
|
|
|
|
import org.json.JSONObject;
|
2014-02-27 04:54:44 +01:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
2014-04-06 21:07:53 +02:00
|
|
|
import org.jsoup.select.Elements;
|
2014-02-27 04:54:44 +01:00
|
|
|
|
2014-04-20 07:41:11 +02:00
|
|
|
import com.rarchives.ripme.ripper.AlbumRipper;
|
2014-03-01 11:13:32 +01:00
|
|
|
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
|
2014-06-22 02:08:42 +02:00
|
|
|
import com.rarchives.ripme.utils.Http;
|
2014-04-13 01:53:49 +02:00
|
|
|
import com.rarchives.ripme.utils.Utils;
|
2014-02-27 04:54:44 +01:00
|
|
|
|
2014-04-20 07:41:11 +02:00
|
|
|
public class ImgurRipper extends AlbumRipper {
|
2014-02-27 04:54:44 +01:00
|
|
|
|
|
|
|
private static final String DOMAIN = "imgur.com",
|
|
|
|
HOST = "imgur";
|
2014-03-03 09:44:07 +01:00
|
|
|
|
2014-02-28 12:04:03 +01:00
|
|
|
private final int SLEEP_BETWEEN_ALBUMS;
|
2017-03-23 10:34:37 +01:00
|
|
|
|
2014-06-12 09:23:22 +02:00
|
|
|
private Document albumDoc;
|
2014-03-03 09:44:07 +01:00
|
|
|
|
2017-10-24 16:33:28 +02:00
|
|
|
enum ALBUM_TYPE {
|
2014-02-27 04:54:44 +01:00
|
|
|
ALBUM,
|
|
|
|
USER,
|
|
|
|
USER_ALBUM,
|
2014-04-20 22:21:52 +02:00
|
|
|
USER_IMAGES,
|
2018-04-02 14:19:32 +02:00
|
|
|
SINGLE_IMAGE,
|
2014-04-06 21:07:53 +02:00
|
|
|
SERIES_OF_IMAGES,
|
|
|
|
SUBREDDIT
|
2017-10-24 16:33:28 +02:00
|
|
|
}
|
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
private ALBUM_TYPE albumType;
|
|
|
|
|
|
|
|
public ImgurRipper(URL url) throws IOException {
|
|
|
|
super(url);
|
2014-02-28 12:04:03 +01:00
|
|
|
SLEEP_BETWEEN_ALBUMS = 1;
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
2014-05-01 05:13:44 +02:00
|
|
|
/**
|
|
|
|
* Imgur ripper does not return the same URL except when ripping
|
|
|
|
* many albums at once (USER). In this case, we want duplicates.
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public boolean allowDuplicates() {
|
2016-12-21 13:16:42 +01:00
|
|
|
return albumType == ALBUM_TYPE.USER;
|
2014-05-01 05:13:44 +02:00
|
|
|
}
|
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
public boolean canRip(URL url) {
|
|
|
|
if (!url.getHost().endsWith(DOMAIN)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
getGID(url);
|
|
|
|
} catch (Exception e) {
|
|
|
|
// Can't get GID, can't rip it.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
|
|
String u = url.toExternalForm();
|
|
|
|
if (u.indexOf('#') >= 0) {
|
2016-12-21 16:35:17 +01:00
|
|
|
u = u.substring(0, u.indexOf('#'));
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
2015-12-21 16:36:28 +01:00
|
|
|
u = u.replace("imgur.com/gallery/", "imgur.com/a/");
|
2014-03-03 09:44:07 +01:00
|
|
|
u = u.replace("https?://m\\.imgur\\.com", "http://imgur.com");
|
|
|
|
u = u.replace("https?://i\\.imgur\\.com", "http://imgur.com");
|
2014-02-27 04:54:44 +01:00
|
|
|
return new URL(u);
|
|
|
|
}
|
|
|
|
|
2014-06-12 09:23:22 +02:00
|
|
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
|
|
|
String gid = getGID(url);
|
|
|
|
if (this.albumType == ALBUM_TYPE.ALBUM) {
|
|
|
|
try {
|
|
|
|
// Attempt to use album title as GID
|
|
|
|
if (albumDoc == null) {
|
2014-06-22 02:08:42 +02:00
|
|
|
albumDoc = Http.url(url).get();
|
2014-06-12 09:23:22 +02:00
|
|
|
}
|
2016-12-21 13:16:42 +01:00
|
|
|
|
|
|
|
Elements elems = null;
|
|
|
|
|
|
|
|
/*
|
|
|
|
// TODO: Add config option for including username in album title.
|
|
|
|
// It's possible a lot of users would not be interested in that info.
|
|
|
|
String user = null;
|
|
|
|
elems = albumDoc.select(".post-account");
|
|
|
|
if (elems.size() > 0) {
|
|
|
|
Element postAccount = elems.get(0);
|
|
|
|
if (postAccount != null) {
|
|
|
|
user = postAccount.text();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
String title = null;
|
2017-12-15 04:38:22 +01:00
|
|
|
final String defaultTitle1 = "Imgur: The most awesome images on the Internet";
|
|
|
|
final String defaultTitle2 = "Imgur: The magic of the Internet";
|
2017-03-23 10:34:37 +01:00
|
|
|
logger.info("Trying to get album title");
|
|
|
|
elems = albumDoc.select("meta[property=og:title]");
|
2017-06-22 20:04:53 +02:00
|
|
|
if (elems != null) {
|
2017-03-23 10:34:37 +01:00
|
|
|
title = elems.attr("content");
|
2017-06-22 15:48:17 +02:00
|
|
|
logger.debug("Title is " + title);
|
2017-06-22 15:37:55 +02:00
|
|
|
}
|
|
|
|
// This is here encase the album is unnamed, to prevent
|
|
|
|
// Imgur: The most awesome images on the Internet from being added onto the album name
|
2017-12-15 04:38:22 +01:00
|
|
|
if (title.contains(defaultTitle1) || title.contains(defaultTitle2)) {
|
2017-06-22 15:48:17 +02:00
|
|
|
logger.debug("Album is untitled or imgur is returning the default title");
|
|
|
|
// We set the title to "" here because if it's found in the next few attempts it will be changed
|
|
|
|
// but if it's nto found there will be no reason to set it later
|
2017-06-22 15:37:55 +02:00
|
|
|
title = "";
|
2017-06-22 15:48:17 +02:00
|
|
|
logger.debug("Trying to use title tag to get title");
|
|
|
|
elems = albumDoc.select("title");
|
2017-06-22 20:04:53 +02:00
|
|
|
if (elems != null) {
|
2017-12-15 04:38:22 +01:00
|
|
|
if (elems.text().contains(defaultTitle1) || elems.text().contains(defaultTitle2)) {
|
2017-06-22 15:48:17 +02:00
|
|
|
logger.debug("Was unable to get album title or album was untitled");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
title = elems.text();
|
|
|
|
}
|
|
|
|
}
|
2016-12-21 13:16:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
String albumTitle = "imgur_";
|
|
|
|
/*
|
|
|
|
// TODO: Add config option (see above)
|
|
|
|
if (user != null) {
|
|
|
|
albumTitle += "user_" + user;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
albumTitle += gid;
|
|
|
|
if (title != null) {
|
2017-06-22 15:37:55 +02:00
|
|
|
albumTitle += "_" + title;
|
2014-06-12 09:23:22 +02:00
|
|
|
}
|
2016-12-21 13:16:42 +01:00
|
|
|
|
|
|
|
return albumTitle;
|
2014-06-12 09:23:22 +02:00
|
|
|
} catch (IOException e) {
|
|
|
|
// Fall back to default album naming convention
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return getHost() + "_" + gid;
|
|
|
|
}
|
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
@Override
|
|
|
|
public void rip() throws IOException {
|
|
|
|
switch (albumType) {
|
2018-04-02 14:19:32 +02:00
|
|
|
case ALBUM:
|
|
|
|
// Fall-through
|
|
|
|
case USER_ALBUM:
|
|
|
|
logger.info("Album type is USER_ALBUM");
|
|
|
|
// Don't call getAlbumTitle(this.url) with this
|
|
|
|
// as it seems to cause the album to be downloaded to a subdir.
|
|
|
|
ripAlbum(this.url);
|
|
|
|
break;
|
|
|
|
case SERIES_OF_IMAGES:
|
|
|
|
logger.info("Album type is SERIES_OF_IMAGES");
|
|
|
|
ripAlbum(this.url);
|
|
|
|
break;
|
|
|
|
case SINGLE_IMAGE:
|
|
|
|
logger.info("Album type is SINGLE_IMAGE");
|
|
|
|
ripSingleImage(this.url);
|
|
|
|
break;
|
|
|
|
case USER:
|
|
|
|
logger.info("Album type is USER");
|
|
|
|
ripUserAccount(url);
|
|
|
|
break;
|
|
|
|
case SUBREDDIT:
|
|
|
|
logger.info("Album type is SUBREDDIT");
|
|
|
|
ripSubreddit(url);
|
|
|
|
break;
|
|
|
|
case USER_IMAGES:
|
|
|
|
logger.info("Album type is USER_IMAGES");
|
|
|
|
ripUserImages(url);
|
|
|
|
break;
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
2014-03-02 03:08:16 +01:00
|
|
|
waitForThreads();
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
2018-04-02 14:19:32 +02:00
|
|
|
private void ripSingleImage(URL url) throws IOException {
|
|
|
|
String strUrl = url.toExternalForm();
|
|
|
|
Document document = getDocument(strUrl);
|
|
|
|
Matcher m = getEmbeddedJsonMatcher(document);
|
|
|
|
if (m.matches()) {
|
|
|
|
JSONObject json = new JSONObject(m.group(1)).getJSONObject("image");
|
|
|
|
addURLToDownload(extractImageUrlFromJson(json), "");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-27 04:54:44 +01:00
|
|
|
private void ripAlbum(URL url) throws IOException {
|
2014-02-28 12:04:03 +01:00
|
|
|
ripAlbum(url, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
private void ripAlbum(URL url, String subdirectory) throws IOException {
|
2014-02-27 04:54:44 +01:00
|
|
|
int index = 0;
|
2014-03-01 11:13:32 +01:00
|
|
|
this.sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
2014-03-11 09:29:46 +01:00
|
|
|
index = 0;
|
2014-04-13 01:53:49 +02:00
|
|
|
ImgurAlbum album = getImgurAlbum(url);
|
|
|
|
for (ImgurImage imgurImage : album.images) {
|
2014-04-18 07:11:37 +02:00
|
|
|
stopCheck();
|
2014-04-13 01:53:49 +02:00
|
|
|
String saveAs = workingDir.getCanonicalPath();
|
|
|
|
if (!saveAs.endsWith(File.separator)) {
|
|
|
|
saveAs += File.separator;
|
|
|
|
}
|
|
|
|
if (subdirectory != null && !subdirectory.equals("")) {
|
|
|
|
saveAs += subdirectory;
|
|
|
|
}
|
|
|
|
if (!saveAs.endsWith(File.separator)) {
|
|
|
|
saveAs += File.separator;
|
|
|
|
}
|
2014-04-19 20:13:56 +02:00
|
|
|
File subdirFile = new File(saveAs);
|
|
|
|
if (!subdirFile.exists()) {
|
|
|
|
subdirFile.mkdirs();
|
|
|
|
}
|
2014-03-11 09:29:46 +01:00
|
|
|
index += 1;
|
2014-05-26 09:31:58 +02:00
|
|
|
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
|
|
saveAs += String.format("%03d_", index);
|
|
|
|
}
|
|
|
|
saveAs += imgurImage.getSaveAs();
|
2018-02-11 16:04:29 +01:00
|
|
|
saveAs = saveAs.replaceAll("\\?\\d", "");
|
2014-04-13 01:53:49 +02:00
|
|
|
addURLToDownload(imgurImage.url, new File(saveAs));
|
2014-03-11 09:29:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-21 23:47:07 +01:00
|
|
|
public static ImgurAlbum getImgurSeries(URL url) throws IOException {
|
2016-04-17 03:56:57 +02:00
|
|
|
Pattern p = Pattern.compile("^.*imgur\\.com/([a-zA-Z0-9,]*).*$");
|
|
|
|
Matcher m = p.matcher(url.toExternalForm());
|
|
|
|
ImgurAlbum album = new ImgurAlbum(url);
|
|
|
|
if (m.matches()) {
|
|
|
|
String[] imageIds = m.group(1).split(",");
|
|
|
|
for (String imageId : imageIds) {
|
|
|
|
// TODO: Fetch image with ID imageId
|
2017-10-24 16:33:28 +02:00
|
|
|
logger.debug("Fetching image info for ID " + imageId);
|
2016-04-17 03:56:57 +02:00
|
|
|
try {
|
|
|
|
JSONObject json = Http.url("https://api.imgur.com/2/image/" + imageId + ".json").getJSON();
|
|
|
|
if (!json.has("image")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
JSONObject image = json.getJSONObject("image");
|
|
|
|
if (!image.has("links")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
JSONObject links = image.getJSONObject("links");
|
|
|
|
if (!links.has("original")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
String original = links.getString("original");
|
|
|
|
ImgurImage theImage = new ImgurImage(new URL(original));
|
|
|
|
album.addImage(theImage);
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("Got exception while fetching imgur ID " + imageId, e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return album;
|
2015-12-21 23:47:07 +01:00
|
|
|
}
|
|
|
|
|
2014-04-13 01:53:49 +02:00
|
|
|
public static ImgurAlbum getImgurAlbum(URL url) throws IOException {
|
2016-04-17 03:56:57 +02:00
|
|
|
String strUrl = url.toExternalForm();
|
|
|
|
if (!strUrl.contains(",")) {
|
|
|
|
strUrl += "/all";
|
|
|
|
}
|
|
|
|
logger.info(" Retrieving " + strUrl);
|
2018-04-02 14:19:32 +02:00
|
|
|
Document doc = getDocument(strUrl);
|
2014-02-28 12:04:03 +01:00
|
|
|
// Try to use embedded JSON to retrieve images
|
2018-04-02 14:19:32 +02:00
|
|
|
Matcher m = getEmbeddedJsonMatcher(doc);
|
2014-02-28 12:04:03 +01:00
|
|
|
if (m.matches()) {
|
|
|
|
try {
|
|
|
|
JSONObject json = new JSONObject(m.group(1));
|
2018-04-02 14:19:32 +02:00
|
|
|
JSONArray jsonImages = json.getJSONObject("image")
|
2015-09-17 11:04:57 +02:00
|
|
|
.getJSONObject("album_images")
|
|
|
|
.getJSONArray("images");
|
2018-04-02 14:19:32 +02:00
|
|
|
return createImgurAlbumFromJsonArray(url, jsonImages);
|
2014-02-28 12:04:03 +01:00
|
|
|
} catch (JSONException e) {
|
|
|
|
logger.debug("Error while parsing JSON at " + url + ", continuing", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-07 08:41:49 +01:00
|
|
|
// TODO If album is empty, use this to check for cached images:
|
|
|
|
// http://i.rarchives.com/search.cgi?cache=http://imgur.com/a/albumID
|
|
|
|
// At the least, get the thumbnails.
|
2014-04-04 09:55:04 +02:00
|
|
|
|
|
|
|
logger.info("[!] Falling back to /noscript method");
|
|
|
|
|
|
|
|
String newUrl = url.toExternalForm() + "/noscript";
|
|
|
|
logger.info(" Retrieving " + newUrl);
|
|
|
|
doc = Jsoup.connect(newUrl)
|
|
|
|
.userAgent(USER_AGENT)
|
|
|
|
.get();
|
2014-02-28 12:04:03 +01:00
|
|
|
|
|
|
|
// Fall back to parsing HTML elements
|
|
|
|
// NOTE: This does not always get the highest-resolution images!
|
2014-04-13 01:53:49 +02:00
|
|
|
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
|
2014-02-27 04:54:44 +01:00
|
|
|
for (Element thumb : doc.select("div.image")) {
|
|
|
|
String image;
|
2018-05-30 04:48:44 +02:00
|
|
|
if (!thumb.select("a.zoom").isEmpty()) {
|
2014-02-27 04:54:44 +01:00
|
|
|
// Clickably full-size
|
|
|
|
image = "http:" + thumb.select("a").attr("href");
|
2018-05-30 04:48:44 +02:00
|
|
|
} else if (!thumb.select("img").isEmpty()) {
|
2014-02-27 04:54:44 +01:00
|
|
|
image = "http:" + thumb.select("img").attr("src");
|
|
|
|
} else {
|
|
|
|
// Unable to find image in this div
|
2014-02-28 04:49:28 +01:00
|
|
|
logger.error("[!] Unable to find image in div: " + thumb.toString());
|
2014-02-27 04:54:44 +01:00
|
|
|
continue;
|
|
|
|
}
|
2015-12-24 22:40:04 +01:00
|
|
|
if (image.endsWith(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
|
2016-04-17 03:56:57 +02:00
|
|
|
image = image.replace(".gif", ".mp4");
|
2015-12-24 22:40:04 +01:00
|
|
|
}
|
2014-04-13 01:53:49 +02:00
|
|
|
ImgurImage imgurImage = new ImgurImage(new URL(image));
|
|
|
|
imgurAlbum.addImage(imgurImage);
|
2014-02-28 12:04:03 +01:00
|
|
|
}
|
2014-04-13 01:53:49 +02:00
|
|
|
return imgurAlbum;
|
2014-02-28 12:04:03 +01:00
|
|
|
}
|
2017-03-23 10:34:37 +01:00
|
|
|
|
2018-04-02 14:19:32 +02:00
|
|
|
private static Matcher getEmbeddedJsonMatcher(Document doc) {
|
|
|
|
Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL);
|
|
|
|
return p.matcher(doc.body().html());
|
|
|
|
}
|
|
|
|
|
|
|
|
private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException {
|
|
|
|
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
|
|
|
|
int imagesLength = jsonImages.length();
|
|
|
|
for (int i = 0; i < imagesLength; i++) {
|
|
|
|
JSONObject jsonImage = jsonImages.getJSONObject(i);
|
|
|
|
imgurAlbum.addImage(createImgurImageFromJson(jsonImage));
|
|
|
|
}
|
|
|
|
return imgurAlbum;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException {
|
|
|
|
return new ImgurImage(extractImageUrlFromJson(json));
|
|
|
|
}
|
|
|
|
|
|
|
|
private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException {
|
|
|
|
String ext = json.getString("ext");
|
|
|
|
if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
|
|
|
|
ext = ".mp4";
|
|
|
|
}
|
|
|
|
return new URL(
|
|
|
|
"http://i.imgur.com/"
|
|
|
|
+ json.getString("hash")
|
|
|
|
+ ext);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static Document getDocument(String strUrl) throws IOException {
|
|
|
|
return Jsoup.connect(strUrl)
|
|
|
|
.userAgent(USER_AGENT)
|
|
|
|
.timeout(10 * 1000)
|
|
|
|
.maxBodySize(0)
|
|
|
|
.get();
|
|
|
|
}
|
|
|
|
|
2014-02-28 12:04:03 +01:00
|
|
|
/**
|
|
|
|
* Rips all albums in an imgur user's account.
|
|
|
|
* @param url
|
|
|
|
* URL to imgur user account (http://username.imgur.com)
|
|
|
|
* @throws IOException
|
|
|
|
*/
|
|
|
|
private void ripUserAccount(URL url) throws IOException {
|
2014-06-20 13:09:36 +02:00
|
|
|
logger.info("Retrieving " + url);
|
|
|
|
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
|
2014-06-22 02:08:42 +02:00
|
|
|
Document doc = Http.url(url).get();
|
2014-02-28 12:04:03 +01:00
|
|
|
for (Element album : doc.select("div.cover a")) {
|
2014-04-18 07:11:37 +02:00
|
|
|
stopCheck();
|
2014-02-28 12:04:03 +01:00
|
|
|
if (!album.hasAttr("href")
|
|
|
|
|| !album.attr("href").contains("imgur.com/a/")) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
String albumID = album.attr("href").substring(album.attr("href").lastIndexOf('/') + 1);
|
|
|
|
URL albumURL = new URL("http:" + album.attr("href") + "/noscript");
|
|
|
|
try {
|
|
|
|
ripAlbum(albumURL, albumID);
|
|
|
|
Thread.sleep(SLEEP_BETWEEN_ALBUMS * 1000);
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("Error while ripping album: " + e.getMessage(), e);
|
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
}
|
2017-03-23 10:34:37 +01:00
|
|
|
|
2014-04-20 22:21:52 +02:00
|
|
|
private void ripUserImages(URL url) throws IOException {
|
|
|
|
int page = 0; int imagesFound = 0; int imagesTotal = 0;
|
|
|
|
String jsonUrl = url.toExternalForm().replace("/all", "/ajax/images");
|
|
|
|
if (jsonUrl.contains("#")) {
|
|
|
|
jsonUrl = jsonUrl.substring(0, jsonUrl.indexOf("#"));
|
|
|
|
}
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
try {
|
|
|
|
page++;
|
|
|
|
String jsonUrlWithParams = jsonUrl + "?sort=0&order=1&album=0&page=" + page + "&perPage=60";
|
2014-06-22 02:08:42 +02:00
|
|
|
JSONObject json = Http.url(jsonUrlWithParams).getJSON();
|
2014-04-20 22:21:52 +02:00
|
|
|
JSONObject jsonData = json.getJSONObject("data");
|
|
|
|
if (jsonData.has("count")) {
|
|
|
|
imagesTotal = jsonData.getInt("count");
|
|
|
|
}
|
|
|
|
JSONArray images = jsonData.getJSONArray("images");
|
|
|
|
for (int i = 0; i < images.length(); i++) {
|
|
|
|
imagesFound++;
|
|
|
|
JSONObject image = images.getJSONObject(i);
|
|
|
|
String imageUrl = "http://i.imgur.com/" + image.getString("hash") + image.getString("ext");
|
2014-05-26 09:31:58 +02:00
|
|
|
String prefix = "";
|
|
|
|
if (Utils.getConfigBoolean("download.save_order", true)) {
|
|
|
|
prefix = String.format("%03d_", imagesFound);
|
|
|
|
}
|
|
|
|
addURLToDownload(new URL(imageUrl), prefix);
|
2014-04-20 22:21:52 +02:00
|
|
|
}
|
|
|
|
if (imagesFound >= imagesTotal) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
Thread.sleep(1000);
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.error("Error while ripping user images: " + e.getMessage(), e);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-03-23 10:34:37 +01:00
|
|
|
|
2014-04-06 21:07:53 +02:00
|
|
|
private void ripSubreddit(URL url) throws IOException {
|
|
|
|
int page = 0;
|
|
|
|
while (true) {
|
2014-04-18 07:11:37 +02:00
|
|
|
stopCheck();
|
2014-04-06 21:07:53 +02:00
|
|
|
String pageURL = url.toExternalForm();
|
|
|
|
if (!pageURL.endsWith("/")) {
|
|
|
|
pageURL += "/";
|
|
|
|
}
|
|
|
|
pageURL += "page/" + page + "/miss?scrolled";
|
|
|
|
logger.info(" Retrieving " + pageURL);
|
2014-06-22 02:08:42 +02:00
|
|
|
Document doc = Http.url(pageURL).get();
|
2014-04-06 21:07:53 +02:00
|
|
|
Elements imgs = doc.select(".post img");
|
|
|
|
for (Element img : imgs) {
|
|
|
|
String image = img.attr("src");
|
|
|
|
if (image.startsWith("//")) {
|
|
|
|
image = "http:" + image;
|
|
|
|
}
|
|
|
|
if (image.contains("b.")) {
|
|
|
|
image = image.replace("b.", ".");
|
|
|
|
}
|
|
|
|
URL imageURL = new URL(image);
|
|
|
|
addURLToDownload(imageURL);
|
|
|
|
}
|
2018-05-30 04:48:44 +02:00
|
|
|
if (imgs.isEmpty()) {
|
2014-04-06 21:07:53 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
page++;
|
|
|
|
try {
|
|
|
|
Thread.sleep(1000);
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
logger.error("Interrupted while waiting to load next album: ", e);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getHost() {
|
|
|
|
return HOST;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getGID(URL url) throws MalformedURLException {
|
2016-12-21 13:16:42 +01:00
|
|
|
Pattern p = null;
|
|
|
|
Matcher m = null;
|
|
|
|
|
2016-12-21 16:35:17 +01:00
|
|
|
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery)/([a-zA-Z0-9]{5,}).*$");
|
2015-12-21 16:36:28 +01:00
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
2016-12-21 13:16:42 +01:00
|
|
|
// Imgur album or gallery
|
2015-12-21 16:36:28 +01:00
|
|
|
albumType = ALBUM_TYPE.ALBUM;
|
|
|
|
String gid = m.group(m.groupCount());
|
|
|
|
this.url = new URL("http://imgur.com/a/" + gid);
|
|
|
|
return gid;
|
|
|
|
}
|
2017-11-09 07:45:32 +01:00
|
|
|
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/(a|gallery|t)/[a-zA-Z0-9]*/([a-zA-Z0-9]{5,}).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur album or gallery
|
|
|
|
albumType = ALBUM_TYPE.ALBUM;
|
|
|
|
String gid = m.group(m.groupCount());
|
|
|
|
this.url = new URL("http://imgur.com/a/" + gid);
|
|
|
|
return gid;
|
|
|
|
}
|
2014-03-03 09:44:07 +01:00
|
|
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/?$");
|
2014-02-27 04:54:44 +01:00
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Root imgur account
|
2014-03-03 09:44:07 +01:00
|
|
|
String gid = m.group(1);
|
2014-04-06 21:07:53 +02:00
|
|
|
if (gid.equals("www")) {
|
|
|
|
throw new MalformedURLException("Cannot rip the www.imgur.com homepage");
|
2014-03-03 09:44:07 +01:00
|
|
|
}
|
2014-02-27 04:54:44 +01:00
|
|
|
albumType = ALBUM_TYPE.USER;
|
2016-12-21 13:16:42 +01:00
|
|
|
return "user_" + gid;
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
2014-04-20 22:21:52 +02:00
|
|
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/all.*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur account images
|
|
|
|
albumType = ALBUM_TYPE.USER_IMAGES;
|
|
|
|
return m.group(1) + "_images";
|
|
|
|
}
|
2014-05-03 08:13:31 +02:00
|
|
|
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{3,})\\.imgur\\.com/([a-zA-Z0-9\\-_]+).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur account album
|
|
|
|
albumType = ALBUM_TYPE.USER_ALBUM;
|
|
|
|
return m.group(1) + "-" + m.group(2);
|
|
|
|
}
|
2016-12-21 16:35:17 +01:00
|
|
|
p = Pattern.compile("^https?://(www\\.|m\\.)?imgur\\.com/r/([a-zA-Z0-9\\-_]{3,})(/top|/new)?(/all|/year|/month|/week|/day)?/?$");
|
2014-04-06 21:07:53 +02:00
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur subreddit aggregator
|
|
|
|
albumType = ALBUM_TYPE.SUBREDDIT;
|
|
|
|
String album = m.group(2);
|
|
|
|
for (int i = 3; i <= m.groupCount(); i++) {
|
|
|
|
if (m.group(i) != null) {
|
|
|
|
album += "_" + m.group(i).replace("/", "");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return album;
|
|
|
|
}
|
2016-12-21 16:35:17 +01:00
|
|
|
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/r/(\\w+)/([a-zA-Z0-9,]{5,}).*$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Imgur subreddit album or image (treat as album)
|
|
|
|
albumType = ALBUM_TYPE.ALBUM;
|
|
|
|
String subreddit = m.group(m.groupCount() - 1);
|
|
|
|
String gid = m.group(m.groupCount());
|
|
|
|
this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid);
|
|
|
|
return "r_" + subreddit + "_" + gid;
|
|
|
|
}
|
2018-04-02 14:19:32 +02:00
|
|
|
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$");
|
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Single imgur image
|
|
|
|
albumType = ALBUM_TYPE.SINGLE_IMAGE;
|
|
|
|
return m.group(m.groupCount());
|
|
|
|
}
|
2015-12-21 23:47:07 +01:00
|
|
|
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
|
2014-02-27 04:54:44 +01:00
|
|
|
m = p.matcher(url.toExternalForm());
|
|
|
|
if (m.matches()) {
|
|
|
|
// Series of imgur images
|
|
|
|
albumType = ALBUM_TYPE.SERIES_OF_IMAGES;
|
2014-03-03 09:44:07 +01:00
|
|
|
String gid = m.group(m.groupCount());
|
|
|
|
if (!gid.contains(",")) {
|
|
|
|
throw new MalformedURLException("Imgur image doesn't contain commas");
|
|
|
|
}
|
|
|
|
return gid.replaceAll(",", "-");
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
2014-05-03 08:13:31 +02:00
|
|
|
throw new MalformedURLException("Unsupported imgur URL format: " + url.toExternalForm());
|
2014-02-27 04:54:44 +01:00
|
|
|
}
|
|
|
|
|
2014-03-03 09:44:07 +01:00
|
|
|
public ALBUM_TYPE getAlbumType() {
|
|
|
|
return albumType;
|
|
|
|
}
|
2014-04-13 01:53:49 +02:00
|
|
|
|
|
|
|
public static class ImgurImage {
|
2017-10-24 16:33:28 +02:00
|
|
|
String title = "";
|
|
|
|
String description = "";
|
|
|
|
String extension = "";
|
2014-04-13 01:53:49 +02:00
|
|
|
public URL url = null;
|
|
|
|
|
2017-10-24 16:33:28 +02:00
|
|
|
ImgurImage(URL url) {
|
2014-04-13 01:53:49 +02:00
|
|
|
this.url = url;
|
|
|
|
String tempUrl = url.toExternalForm();
|
|
|
|
this.extension = tempUrl.substring(tempUrl.lastIndexOf('.'));
|
2014-04-20 22:21:52 +02:00
|
|
|
if (this.extension.contains("?")) {
|
|
|
|
this.extension = this.extension.substring(0, this.extension.indexOf("?"));
|
|
|
|
}
|
2014-04-13 01:53:49 +02:00
|
|
|
}
|
2017-10-24 16:33:28 +02:00
|
|
|
ImgurImage(URL url, String title) {
|
2014-04-13 01:53:49 +02:00
|
|
|
this(url);
|
|
|
|
this.title = title;
|
|
|
|
}
|
|
|
|
public ImgurImage(URL url, String title, String description) {
|
|
|
|
this(url, title);
|
|
|
|
this.description = description;
|
|
|
|
}
|
2017-10-24 16:33:28 +02:00
|
|
|
String getSaveAs() {
|
2014-04-13 01:53:49 +02:00
|
|
|
String saveAs = this.title;
|
|
|
|
String u = url.toExternalForm();
|
2014-04-20 22:21:52 +02:00
|
|
|
if (u.contains("?")) {
|
|
|
|
u = u.substring(0, u.indexOf("?"));
|
|
|
|
}
|
2014-04-13 01:53:49 +02:00
|
|
|
String imgId = u.substring(u.lastIndexOf('/') + 1, u.lastIndexOf('.'));
|
|
|
|
if (saveAs == null || saveAs.equals("")) {
|
|
|
|
saveAs = imgId;
|
|
|
|
} else {
|
|
|
|
saveAs = saveAs + "_" + imgId;
|
|
|
|
}
|
|
|
|
saveAs = Utils.filesystemSafe(saveAs);
|
|
|
|
return saveAs + this.extension;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public static class ImgurAlbum {
|
2017-10-24 16:33:28 +02:00
|
|
|
String title = null;
|
2014-04-13 01:53:49 +02:00
|
|
|
public URL url = null;
|
2017-10-24 16:33:28 +02:00
|
|
|
public List<ImgurImage> images = new ArrayList<>();
|
|
|
|
ImgurAlbum(URL url) {
|
2014-04-13 01:53:49 +02:00
|
|
|
this.url = url;
|
|
|
|
}
|
|
|
|
public ImgurAlbum(URL url, String title) {
|
|
|
|
this(url);
|
|
|
|
this.title = title;
|
|
|
|
}
|
2017-10-24 16:33:28 +02:00
|
|
|
void addImage(ImgurImage image) {
|
2014-04-13 01:53:49 +02:00
|
|
|
images.add(image);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-21 13:16:42 +01:00
|
|
|
}
|