Fixes #410 Imgur - Cannot download albums with a single image in

This commit is contained in:
avesther 2018-04-02 14:19:32 +02:00
parent fc0e3588b6
commit 3716bbfb54
2 changed files with 100 additions and 54 deletions

View File

@ -36,6 +36,7 @@ public class ImgurRipper extends AlbumRipper {
USER, USER,
USER_ALBUM, USER_ALBUM,
USER_IMAGES, USER_IMAGES,
SINGLE_IMAGE,
SERIES_OF_IMAGES, SERIES_OF_IMAGES,
SUBREDDIT SUBREDDIT
} }
@ -155,34 +156,48 @@ public class ImgurRipper extends AlbumRipper {
@Override @Override
public void rip() throws IOException { public void rip() throws IOException {
switch (albumType) { switch (albumType) {
case ALBUM: case ALBUM:
// Fall-through // Fall-through
case USER_ALBUM: case USER_ALBUM:
logger.info("Album type is USER_ALBUM"); logger.info("Album type is USER_ALBUM");
// Don't call getAlbumTitle(this.url) with this // Don't call getAlbumTitle(this.url) with this
// as it seems to cause the album to be downloaded to a subdir. // as it seems to cause the album to be downloaded to a subdir.
ripAlbum(this.url); ripAlbum(this.url);
break; break;
case SERIES_OF_IMAGES: case SERIES_OF_IMAGES:
logger.info("Album type is SERIES_OF_IMAGES"); logger.info("Album type is SERIES_OF_IMAGES");
ripAlbum(this.url); ripAlbum(this.url);
break; break;
case USER: case SINGLE_IMAGE:
logger.info("Album type is USER"); logger.info("Album type is SINGLE_IMAGE");
ripUserAccount(url); ripSingleImage(this.url);
break; break;
case SUBREDDIT: case USER:
logger.info("Album type is SUBREDDIT"); logger.info("Album type is USER");
ripSubreddit(url); ripUserAccount(url);
break; break;
case USER_IMAGES: case SUBREDDIT:
logger.info("Album type is USER_IMAGES"); logger.info("Album type is SUBREDDIT");
ripUserImages(url); ripSubreddit(url);
break; break;
case USER_IMAGES:
logger.info("Album type is USER_IMAGES");
ripUserImages(url);
break;
} }
waitForThreads(); waitForThreads();
} }
private void ripSingleImage(URL url) throws IOException {
String strUrl = url.toExternalForm();
Document document = getDocument(strUrl);
Matcher m = getEmbeddedJsonMatcher(document);
if (m.matches()) {
JSONObject json = new JSONObject(m.group(1)).getJSONObject("image");
addURLToDownload(extractImageUrlFromJson(json), "");
}
}
private void ripAlbum(URL url) throws IOException { private void ripAlbum(URL url) throws IOException {
ripAlbum(url, ""); ripAlbum(url, "");
} }
@ -257,38 +272,16 @@ public class ImgurRipper extends AlbumRipper {
strUrl += "/all"; strUrl += "/all";
} }
logger.info(" Retrieving " + strUrl); logger.info(" Retrieving " + strUrl);
Document doc = Jsoup.connect(strUrl) Document doc = getDocument(strUrl);
.userAgent(USER_AGENT)
.timeout(10 * 1000)
.maxBodySize(0)
.get();
// Try to use embedded JSON to retrieve images // Try to use embedded JSON to retrieve images
Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); Matcher m = getEmbeddedJsonMatcher(doc);
Matcher m = p.matcher(doc.body().html());
if (m.matches()) { if (m.matches()) {
try { try {
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
JSONObject json = new JSONObject(m.group(1)); JSONObject json = new JSONObject(m.group(1));
JSONArray images = json.getJSONObject("image") JSONArray jsonImages = json.getJSONObject("image")
.getJSONObject("album_images") .getJSONObject("album_images")
.getJSONArray("images"); .getJSONArray("images");
int imagesLength = images.length(); return createImgurAlbumFromJsonArray(url, jsonImages);
for (int i = 0; i < imagesLength; i++) {
JSONObject image = images.getJSONObject(i);
String ext = image.getString("ext");
if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
ext = ".mp4";
}
URL imageURL = new URL(
"http://i.imgur.com/"
+ image.getString("hash")
+ ext);
ImgurImage imgurImage = new ImgurImage(imageURL);
imgurImage.extension = ext;
imgurAlbum.addImage(imgurImage);
}
return imgurAlbum;
} catch (JSONException e) { } catch (JSONException e) {
logger.debug("Error while parsing JSON at " + url + ", continuing", e); logger.debug("Error while parsing JSON at " + url + ", continuing", e);
} }
@ -330,6 +323,44 @@ public class ImgurRipper extends AlbumRipper {
return imgurAlbum; return imgurAlbum;
} }
private static Matcher getEmbeddedJsonMatcher(Document doc) {
Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL);
return p.matcher(doc.body().html());
}
private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException {
ImgurAlbum imgurAlbum = new ImgurAlbum(url);
int imagesLength = jsonImages.length();
for (int i = 0; i < imagesLength; i++) {
JSONObject jsonImage = jsonImages.getJSONObject(i);
imgurAlbum.addImage(createImgurImageFromJson(jsonImage));
}
return imgurAlbum;
}
private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException {
return new ImgurImage(extractImageUrlFromJson(json));
}
private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException {
String ext = json.getString("ext");
if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) {
ext = ".mp4";
}
return new URL(
"http://i.imgur.com/"
+ json.getString("hash")
+ ext);
}
private static Document getDocument(String strUrl) throws IOException {
return Jsoup.connect(strUrl)
.userAgent(USER_AGENT)
.timeout(10 * 1000)
.maxBodySize(0)
.get();
}
/** /**
* Rips all albums in an imgur user's account. * Rips all albums in an imgur user's account.
* @param url * @param url
@ -507,6 +538,13 @@ public class ImgurRipper extends AlbumRipper {
this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid); this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid);
return "r_" + subreddit + "_" + gid; return "r_" + subreddit + "_" + gid;
} }
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
// Single imgur image
albumType = ALBUM_TYPE.SINGLE_IMAGE;
return m.group(m.groupCount());
}
p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$");
m = p.matcher(url.toExternalForm()); m = p.matcher(url.toExternalForm());
if (m.matches()) { if (m.matches()) {

View File

@ -1,13 +1,13 @@
package com.rarchives.ripme.tst.ripper.rippers; package com.rarchives.ripme.tst.ripper.rippers;
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.rarchives.ripme.ripper.rippers.ImgurRipper;
import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum;
public class ImgurRipperTest extends RippersTest { public class ImgurRipperTest extends RippersTest {
public void testImgurURLFailures() throws IOException { public void testImgurURLFailures() throws IOException {
@ -17,7 +17,6 @@ public class ImgurRipperTest extends RippersTest {
failURLs.add(new URL("http://imgur.com/")); failURLs.add(new URL("http://imgur.com/"));
failURLs.add(new URL("http://i.imgur.com")); failURLs.add(new URL("http://i.imgur.com"));
failURLs.add(new URL("http://i.imgur.com/")); failURLs.add(new URL("http://i.imgur.com/"));
failURLs.add(new URL("http://imgur.com/image"));
failURLs.add(new URL("http://imgur.com/image.jpg")); failURLs.add(new URL("http://imgur.com/image.jpg"));
failURLs.add(new URL("http://i.imgur.com/image.jpg")); failURLs.add(new URL("http://i.imgur.com/image.jpg"));
for (URL url : failURLs) { for (URL url : failURLs) {
@ -50,6 +49,15 @@ public class ImgurRipperTest extends RippersTest {
} }
} }
public void testImgurSingleImage() throws IOException {
List<URL> contentURLs = new ArrayList<>();
contentURLs.add(new URL("http://imgur.com/qbfcLyG")); // Single image URL
contentURLs.add(new URL("https://imgur.com/KexUO")); // Single image URL
for (URL url : contentURLs) {
ImgurRipper ripper = new ImgurRipper(url);
testRipper(ripper);
}
}
public void testImgurAlbumWithMoreThan20Pictures() throws IOException { public void testImgurAlbumWithMoreThan20Pictures() throws IOException {
ImgurAlbum album = ImgurRipper.getImgurAlbum(new URL("http://imgur.com/a/HUMsq")); ImgurAlbum album = ImgurRipper.getImgurAlbum(new URL("http://imgur.com/a/HUMsq"));