From 3716bbfb5414abfb30de77bfb19478b7d569d0e6 Mon Sep 17 00:00:00 2001 From: avesther Date: Mon, 2 Apr 2018 14:19:32 +0200 Subject: [PATCH] Fixes #410 Imgur - Cannot download albums with a single image in --- .../ripme/ripper/rippers/ImgurRipper.java | 138 +++++++++++------- .../tst/ripper/rippers/ImgurRipperTest.java | 16 +- 2 files changed, 100 insertions(+), 54 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java index b595d9e2..fe7937d3 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ImgurRipper.java @@ -36,6 +36,7 @@ public class ImgurRipper extends AlbumRipper { USER, USER_ALBUM, USER_IMAGES, + SINGLE_IMAGE, SERIES_OF_IMAGES, SUBREDDIT } @@ -155,34 +156,48 @@ public class ImgurRipper extends AlbumRipper { @Override public void rip() throws IOException { switch (albumType) { - case ALBUM: - // Fall-through - case USER_ALBUM: - logger.info("Album type is USER_ALBUM"); - // Don't call getAlbumTitle(this.url) with this - // as it seems to cause the album to be downloaded to a subdir. - ripAlbum(this.url); - break; - case SERIES_OF_IMAGES: - logger.info("Album type is SERIES_OF_IMAGES"); - ripAlbum(this.url); - break; - case USER: - logger.info("Album type is USER"); - ripUserAccount(url); - break; - case SUBREDDIT: - logger.info("Album type is SUBREDDIT"); - ripSubreddit(url); - break; - case USER_IMAGES: - logger.info("Album type is USER_IMAGES"); - ripUserImages(url); - break; + case ALBUM: + // Fall-through + case USER_ALBUM: + logger.info("Album type is USER_ALBUM"); + // Don't call getAlbumTitle(this.url) with this + // as it seems to cause the album to be downloaded to a subdir. + ripAlbum(this.url); + break; + case SERIES_OF_IMAGES: + logger.info("Album type is SERIES_OF_IMAGES"); + ripAlbum(this.url); + break; + case SINGLE_IMAGE: + logger.info("Album type is SINGLE_IMAGE"); + ripSingleImage(this.url); + break; + case USER: + logger.info("Album type is USER"); + ripUserAccount(url); + break; + case SUBREDDIT: + logger.info("Album type is SUBREDDIT"); + ripSubreddit(url); + break; + case USER_IMAGES: + logger.info("Album type is USER_IMAGES"); + ripUserImages(url); + break; } waitForThreads(); } + private void ripSingleImage(URL url) throws IOException { + String strUrl = url.toExternalForm(); + Document document = getDocument(strUrl); + Matcher m = getEmbeddedJsonMatcher(document); + if (m.matches()) { + JSONObject json = new JSONObject(m.group(1)).getJSONObject("image"); + addURLToDownload(extractImageUrlFromJson(json), ""); + } + } + private void ripAlbum(URL url) throws IOException { ripAlbum(url, ""); } @@ -257,38 +272,16 @@ public class ImgurRipper extends AlbumRipper { strUrl += "/all"; } logger.info(" Retrieving " + strUrl); - Document doc = Jsoup.connect(strUrl) - .userAgent(USER_AGENT) - .timeout(10 * 1000) - .maxBodySize(0) - .get(); - + Document doc = getDocument(strUrl); // Try to use embedded JSON to retrieve images - Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); - Matcher m = p.matcher(doc.body().html()); + Matcher m = getEmbeddedJsonMatcher(doc); if (m.matches()) { try { - ImgurAlbum imgurAlbum = new ImgurAlbum(url); JSONObject json = new JSONObject(m.group(1)); - JSONArray images = json.getJSONObject("image") + JSONArray jsonImages = json.getJSONObject("image") .getJSONObject("album_images") .getJSONArray("images"); - int imagesLength = images.length(); - for (int i = 0; i < imagesLength; i++) { - JSONObject image = images.getJSONObject(i); - String ext = image.getString("ext"); - if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) { - ext = ".mp4"; - } - URL imageURL = new URL( - "http://i.imgur.com/" - + image.getString("hash") - + ext); - ImgurImage imgurImage = new ImgurImage(imageURL); - imgurImage.extension = ext; - imgurAlbum.addImage(imgurImage); - } - return imgurAlbum; + return createImgurAlbumFromJsonArray(url, jsonImages); } catch (JSONException e) { logger.debug("Error while parsing JSON at " + url + ", continuing", e); } @@ -330,6 +323,44 @@ public class ImgurRipper extends AlbumRipper { return imgurAlbum; } + private static Matcher getEmbeddedJsonMatcher(Document doc) { + Pattern p = Pattern.compile("^.*widgetFactory.mergeConfig\\('gallery', (.*?)\\);.*$", Pattern.DOTALL); + return p.matcher(doc.body().html()); + } + + private static ImgurAlbum createImgurAlbumFromJsonArray(URL url, JSONArray jsonImages) throws MalformedURLException { + ImgurAlbum imgurAlbum = new ImgurAlbum(url); + int imagesLength = jsonImages.length(); + for (int i = 0; i < imagesLength; i++) { + JSONObject jsonImage = jsonImages.getJSONObject(i); + imgurAlbum.addImage(createImgurImageFromJson(jsonImage)); + } + return imgurAlbum; + } + + private static ImgurImage createImgurImageFromJson(JSONObject json) throws MalformedURLException { + return new ImgurImage(extractImageUrlFromJson(json)); + } + + private static URL extractImageUrlFromJson(JSONObject json) throws MalformedURLException { + String ext = json.getString("ext"); + if (ext.equals(".gif") && Utils.getConfigBoolean("prefer.mp4", false)) { + ext = ".mp4"; + } + return new URL( + "http://i.imgur.com/" + + json.getString("hash") + + ext); + } + + private static Document getDocument(String strUrl) throws IOException { + return Jsoup.connect(strUrl) + .userAgent(USER_AGENT) + .timeout(10 * 1000) + .maxBodySize(0) + .get(); + } + /** * Rips all albums in an imgur user's account. * @param url @@ -507,6 +538,13 @@ public class ImgurRipper extends AlbumRipper { this.url = new URL("http://imgur.com/r/" + subreddit + "/" + gid); return "r_" + subreddit + "_" + gid; } + p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9]{5,})$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Single imgur image + albumType = ALBUM_TYPE.SINGLE_IMAGE; + return m.group(m.groupCount()); + } p = Pattern.compile("^https?://(i\\.|www\\.|m\\.)?imgur\\.com/([a-zA-Z0-9,]{5,}).*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java index 46f5679f..c321a99e 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ImgurRipperTest.java @@ -1,13 +1,13 @@ package com.rarchives.ripme.tst.ripper.rippers; +import com.rarchives.ripme.ripper.rippers.ImgurRipper; +import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; + import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; -import com.rarchives.ripme.ripper.rippers.ImgurRipper; -import com.rarchives.ripme.ripper.rippers.ImgurRipper.ImgurAlbum; - public class ImgurRipperTest extends RippersTest { public void testImgurURLFailures() throws IOException { @@ -17,7 +17,6 @@ public class ImgurRipperTest extends RippersTest { failURLs.add(new URL("http://imgur.com/")); failURLs.add(new URL("http://i.imgur.com")); failURLs.add(new URL("http://i.imgur.com/")); - failURLs.add(new URL("http://imgur.com/image")); failURLs.add(new URL("http://imgur.com/image.jpg")); failURLs.add(new URL("http://i.imgur.com/image.jpg")); for (URL url : failURLs) { @@ -50,6 +49,15 @@ public class ImgurRipperTest extends RippersTest { } } + public void testImgurSingleImage() throws IOException { + List contentURLs = new ArrayList<>(); + contentURLs.add(new URL("http://imgur.com/qbfcLyG")); // Single image URL + contentURLs.add(new URL("https://imgur.com/KexUO")); // Single image URL + for (URL url : contentURLs) { + ImgurRipper ripper = new ImgurRipper(url); + testRipper(ripper); + } + } public void testImgurAlbumWithMoreThan20Pictures() throws IOException { ImgurAlbum album = ImgurRipper.getImgurAlbum(new URL("http://imgur.com/a/HUMsq"));